[Bug]: Thrift Connection Error During Batch File Parsing - Frontend Refresh Fails (Connection refused)
Self Checks
- [x] I have searched for existing issues search for existing issues, including closed ones.
- [x] I confirm that I am using English to submit this report (Language Policy).
- [x] Non-english title submitions will be closed directly ( 非英文标题的提交将会被直接关闭 ) (Language Policy).
- [x] Please do not modify this template :) and fill in all the required fields.
RAGFlow workspace code commit ID
Release v0.18.0 created from 94181a9
RAGFlow image version
v0.18.0
Other environment information
Actual behavior
我使用代码按批次解析未解析的文件
import time
from ragflow_sdk import RAGFlow
API_KEY = "ragflow-*****"
BASE_URL = "http://ip:80"
rag_object = RAGFlow(api_key=API_KEY, base_url=BASE_URL)
dataset = rag_object.list_datasets(name="janes")[0]
all_docs = dataset.list_documents(page_size= 22000)
all_ids = [doc.id for doc in all_docs]
print(all_docs)
print(f"共获取到 {len(all_ids)} 个文档,准备分批进行解析...")
unparsed_docs = [doc for doc in all_docs if doc.run != "DONE"]
print(f"共有 {len(unparsed_docs)} 个文档尚未解析。")
BATCH_SIZE = 10
WAIT_TIMEOUT = 600
CHECK_INTERVAL = 5
def wait_for_batch_finish(documents, timeout=WAIT_TIMEOUT, check_interval=CHECK_INTERVAL):
start_time = time.time()
remaining_docs = {doc.id: doc.name for doc in documents}
print("remaining_docs",remaining_docs)
while time.time() - start_time < timeout and remaining_docs:
print(f"⏳ 正在检查 {len(remaining_docs)} 个文档是否完成解析...")
for doc_id in list(remaining_docs.keys()):
print(doc_id)
# doc_status = dataset.list_documents(id=doc_id).run
# result = dataset.list_documents(id=doc_id)
doc_status = dataset.list_documents(id=doc_id)[0].run
if doc_status == "DONE":
print(f"✅ 解析完成: {remaining_docs[doc_id]}")
del remaining_docs[doc_id]
elif doc_status == "FAIL":
print(f"⚠️ 解析失败(跳过): {remaining_docs[doc_id]}")
del remaining_docs[doc_id]
else:
print(f"⌛ 等待中: {remaining_docs[doc_id]}(状态: {doc_status})")
if remaining_docs:
time.sleep(check_interval)
if remaining_docs:
print(f"⚠️ 超时!仍有 {len(remaining_docs)} 个文档未完成:")
for doc_id in remaining_docs:
print(f" - {remaining_docs[doc_id]}")
else:
print("🎉 本批全部文档解析完成。")
for i in range(0, len(unparsed_docs), BATCH_SIZE):
batch = unparsed_docs[i:i + BATCH_SIZE]
ids = [doc.id for doc in batch]
print(f"\n🚀 启动第 {i // BATCH_SIZE + 1} 批解析,共 {len(ids)} 个文档...")
dataset.async_parse_documents(ids)
# 等待当前批次完成再继续
wait_for_batch_finish(batch)
print("\n✅ 所有文档解析流程结束。")
在批量解析文件过程中,当尝试刷新前端或查看解析完成的文件详情时,系统抛出 Thrift 连接异常,导致:
无法打开已解析文件的详情
无法继续后续文件解析
服务端连接被拒绝 (192.168.64.2:23817) 代码提示报错
Traceback (most recent call last):
File "D:\code\新建文件夹\LLM-server\ragflow\parse_md.py", line 63, in <module>
dataset.async_parse_documents(ids)
File "D:\anaconda\envs\py312\Lib\site-packages\ragflow_sdk\modules\dataset.py", line 88, in async_parse_documents
raise Exception(res.get("message"))
Exception: TTransportException("Could not connect to any of [('192.168.64.2', 23817)]")
Expected behavior
No response
Steps to reproduce
上传大量文件,然后按批次解析,解析过程中刷新前端,查看文件解析块
Additional information
ragflow-server报错:
Traceback (most recent call last):
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 115, in wrapper
ret = func(self, *args, **kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 162, in get_database
return self.client.GetDatabase(GetDatabaseRequest(session_id=self.session_id,
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 1173, in GetDatabase
return self.recv_GetDatabase()
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 1185, in recv_GetDatabase
(fname, mtype, rseqid) = iprot.readMessageBegin()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 134, in readMessageBegin
sz = self.readI32()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 217, in readI32
buff = self.trans.readAll(4)
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 62, in readAll
chunk = self.read(sz - have)
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 164, in read
self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size)))
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 169, in read
raise TTransportException(type=TTransportException.END_OF_FILE,
thrift.transport.TTransport.TTransportException: TSocket read 0 bytes
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 140, in open
handle.connect(sockaddr)
ConnectionRefusedError: [Errno 111] Connection refused
2025-05-20 14:28:18,829 INFO 20 Could not connect to ('192.168.64.2', 23817)
Traceback (most recent call last):
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 115, in wrapper
ret = func(self, *args, **kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 162, in get_database
return self.client.GetDatabase(GetDatabaseRequest(session_id=self.session_id,
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 1173, in GetDatabase
return self.recv_GetDatabase()
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 1185, in recv_GetDatabase
(fname, mtype, rseqid) = iprot.readMessageBegin()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 134, in readMessageBegin
sz = self.readI32()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 217, in readI32
buff = self.trans.readAll(4)
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 62, in readAll
chunk = self.read(sz - have)
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 164, in read
self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size)))
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 169, in read
raise TTransportException(type=TTransportException.END_OF_FILE,
thrift.transport.TTransport.TTransportException: TSocket read 0 bytes
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 140, in open
handle.connect(sockaddr)
ConnectionRefusedError: [Errno 111] Connection refused
2025-05-20 14:28:18,832 ERROR 20 Could not connect to any of [('192.168.64.2', 23817)]
2025-05-20 14:28:18,831 ERROR 20 Could not connect to any of [('192.168.64.2', 23817)]
2025-05-20 14:28:18,833 ERROR 20 Could not connect to any of [('192.168.64.2', 23817)]
Traceback (most recent call last):
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 115, in wrapper
ret = func(self, *args, **kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 162, in get_database
return self.client.GetDatabase(GetDatabaseRequest(session_id=self.session_id,
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 1173, in GetDatabase
return self.recv_GetDatabase()
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 1185, in recv_GetDatabase
(fname, mtype, rseqid) = iprot.readMessageBegin()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 134, in readMessageBegin
sz = self.readI32()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 217, in readI32
buff = self.trans.readAll(4)
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 62, in readAll
chunk = self.read(sz - have)
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 164, in read
self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size)))
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 169, in read
raise TTransportException(type=TTransportException.END_OF_FILE,
thrift.transport.TTransport.TTransportException: TSocket read 0 bytes
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/ragflow/api/apps/chunk_app.py", line 62, in list_chunk
sres = settings.retrievaler.search(query, search.index_name(tenant_id), kb_ids, highlight=True)
File "<@beartype(rag.nlp.search.Dealer.search) at 0x7f943c909fc0>", line 95, in search
File "/ragflow/rag/nlp/search.py", line 97, in search
res = self.dataStore.search(src, [], filters, [], orderBy, offset, limit, idx_names, kb_ids)
File "/ragflow/rag/utils/infinity_conn.py", line 298, in search
db_instance = inf_conn.get_database(self.dbName)
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/utils.py", line 563, in wrapper
return func(*args, **kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity.py", line 89, in get_database
res = self._client.get_database(db_name)
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 120, in wrapper
self._reconnect()
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 72, in _reconnect
self.transport.open()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 155, in open
return self.__trans.open()
File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 149, in open
raise TTransportException(type=TTransportException.NOT_OPEN, message=msg)
thrift.transport.TTransport.TTransportException: Could not connect to any of [('192.168.64.2', 23817)]
Exception ignored in: <function RemoteThriftInfinityConnection.__del__ at 0x7f943c8f2cb0>
Traceback (most recent call last):
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity.py", line 36, in __del__
self.disconnect()
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity.py", line 138, in disconnect
raise InfinityException(res.error_code, res.error_msg)
infinity.common.InfinityException: (<ErrorCode.CLIENT_CLOSE: 6003>, 'Transport not open')
2025-05-20 14:28:18,835 WARNING 20 INFINITY indexExist Could not connect to any of [('192.168.64.2', 23817)]
Exception ignored in: <function RemoteThriftInfinityConnection.__del__ at 0x7f943c8f2cb0>
Traceback (most recent call last):
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity.py", line 36, in __del__
self.disconnect()
File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity.py", line 138, in disconnect
raise InfinityException(res.error_code, res.error_msg)
infinity.common.InfinityException: (<ErrorCode.CLIENT_CLOSE: 6003>, 'Transport not open')
Hello @shuai-dian! I'm here to help you with any bugs, questions, or contributions you might have while you wait for a human maintainer. Let's tackle this issue together!
To resolve the 'Connection refused' error in Thrift services within the RAGFlow system, ensure that the port configurations in 'docker/service_conf.yaml.template' match those in 'docker/.env'. This error typically occurs due to a mismatch between these configuration files, which prevents proper connection to the service. Aligning the port configurations between these two files should resolve the issue [1].
To continue talking to Dosu, mention @dosu.
Help Dosu learn! Give it feedback: Great Response | Irrelevant Answer | Incorrect Sources | Too Verbose | Hallucination | Bug Report | Other
@shuai-dian @yingfeng @rantav @panzi 这个错误解决了吗?
@99RRR 没有解决,数据量大的时候也就出现,可以先换成es数据库
手动重新设置切片方案,可以临时跳过这个问题。
Hello @shuai-dian! I'm here to help you with any bugs, questions, or contributions you might have while you wait for a human maintainer. Let's tackle this issue together!
To resolve the 'Connection refused' error in Thrift services within the RAGFlow system, ensure that the port configurations in 'docker/service_conf.yaml.template' match those in 'docker/.env'. This error typically occurs due to a mismatch between these configuration files, which prevents proper connection to the service. Aligning the port configurations between these two files should resolve the issue [1].
To continue talking to Dosu, mention @dosu.
Help Dosu learn! Give it feedback: Great Response | Irrelevant Answer | Incorrect Sources | Too Verbose | Hallucination | Bug Report | Other
[@dosu] I encountered the same problem in version V0.19.1. I haven't made any modifications to the infinity configuration information in those two configuration files, and the port information is the same. Your reply didn't really solve the problem. The problem I encountered also occurred during the parsing process. After refreshing the page several times, this issue emerged. The questions are as follows:
2025-09-05 18:05:15,439 INFO 21 Could not connect to ('172.18.0.4', 23817) Traceback (most recent call last): File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 115, in wrapper ret = func(self, *args, **kwargs) File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 269, in select return self.client.Select(SelectRequest(session_id=self.session_id, File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 629, in Select return self.recv_Select() File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 641, in recv_Select (fname, mtype, rseqid) = iprot.readMessageBegin() File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 134, in readMessageBegin sz = self.readI32() File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 217, in readI32 buff = self.trans.readAll(4) File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 62, in readAll chunk = self.read(sz - have) File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 164, in read self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size))) File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 169, in read raise TTransportException(type=TTransportException.END_OF_FILE, thrift.transport.TTransport.TTransportException: TSocket read 0 bytes
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 140, in open handle.connect(sockaddr) ConnectionRefusedError: [Errno 111] Connection refused 2025-09-05 18:05:15,445 ERROR 21 Could not connect to any of [('172.18.0.4', 23817)] 2025-09-05 18:05:15,439 INFO 23 Could not connect to ('172.18.0.4', 23817) Traceback (most recent call last): File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 115, in wrapper ret = func(self, *args, **kwargs) File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/client.py", line 240, in insert return self.client.Insert( File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 533, in Insert return self.recv_Insert() File "/ragflow/.venv/lib/python3.10/site-packages/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py", line 545, in recv_Insert (fname, mtype, rseqid) = iprot.readMessageBegin() File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 134, in readMessageBegin sz = self.readI32() File "/ragflow/.venv/lib/python3.10/site-packages/thrift/protocol/TBinaryProtocol.py", line 217, in readI32 buff = self.trans.readAll(4) File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 62, in readAll chunk = self.read(sz - have) File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TTransport.py", line 164, in read self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size))) File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 169, in read raise TTransportException(type=TTransportException.END_OF_FILE, thrift.transport.TTransport.TTransportException: TSocket read 0 bytes
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/ragflow/.venv/lib/python3.10/site-packages/thrift/transport/TSocket.py", line 140, in open handle.connect(sockaddr) ConnectionRefusedError: [Errno 111] Connection refused