gpt-researcher
gpt-researcher copied to clipboard
Publisher agent "directory not found" when using multi agent approach with local docs
Hi. I have previously faced several issues with multi_agent approach. #684 has solved one of them. But now there is another issue when using local docs as knowledge source and not following guidelines. It creates the folder based on the task but publisher_agent
throws this error when trying to save the report:
PUBLISHER: Publishing final research report based on retrieved data...
Traceback (most recent call last):
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\main.py", line 62, in <module>
asyncio.run(main())
File "C:\Users\Emil\AppData\Local\Programs\Python\Python311\Lib\asyncio\runners.py", line 190, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "C:\Users\Emil\AppData\Local\Programs\Python\Python311\Lib\asyncio\runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 653, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\main.py", line 57, in main
research_report = await chief_editor.run_research_task()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\agents\master.py", line 65, in run_research_task
result = await chain.ainvoke({"task": self.task})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\pregel\__init__.py", line 1316, in ainvoke
async for chunk in self.astream(
File "C:\Users\Emil\AppData\Local\Programs\Python\Python311\Lib\contextlib.py", line 222, in __aexit__
await self.gen.athrow(typ, value, traceback)
File "C:\Users\Emil\AppData\Local\Programs\Python\Python311\Lib\contextlib.py", line 222, in __aexit__
await self.gen.athrow(typ, value, traceback)
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\channels\base.py", line 57, in afrom_checkpoint
yield value
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\channels\manager.py", line 37, in AsyncChannelsManager
yield {
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\pregel\__init__.py", line 1195, in astream
_panic_or_proceed(done, inflight, loop.step, asyncio.TimeoutError)
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\pregel\__init__.py", line 1349, in _panic_or_proceed
raise exc
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\pregel\executor.py", line 123, in done
task.result()
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\pregel\retry.py", line 74, in arun_with_retry
await task.proc.ainvoke(task.input, task.config)
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langchain_core\runnables\base.py", line 2911, in ainvoke
input = await step.ainvoke(input, config, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\langgraph\utils.py", line 124, in ainvoke
ret = await asyncio.create_task(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\agents\publisher.py", line 133, in run
final_research_report = await self.publish_research_report(research_state, publish_formats)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\agents\publisher.py", line 90, in publish_research_report
await self.write_report_by_formats(layout, publish_formats)
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\agents\publisher.py", line 124, in write_report_by_formats
await write_text_to_md(layout, self.output_dir)
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\agents\publisher.py", line 156, in write_text_to_md
await write_to_file(md_path, md_content)
File "C:\Users\Emil\Desktop\gpt-researcher\multi_agents\agents\publisher.py", line 141, in write_to_file
async with aiofiles.open(filename, "w", encoding='utf-8') as file:
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\aiofiles\base.py", line 63, in __aenter__
return await self
^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\aiofiles\base.py", line 59, in __await__
self._obj = yield from self._coro.__await__()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\Desktop\gpt-researcher\RESEARCHER_ENV\Lib\site-packages\aiofiles\threadpool\__init__.py", line 92, in _open
f = await loop.run_in_executor(executor, cb)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Emil\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures\thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: './outputs/run_1722601499_Your task is to write a small report on \\report.md'
So, I said maybe I can solve it by explicitly creating the folder (although I thought it was already there as i could see it in outputs folder). Came up with this in publisher.py
:
import os
import aiofiles
from .utils.file_formats import (
write_md_to_pdf,
write_md_to_word,
write_text_to_md,
)
from .utils.views import print_agent_output
class PublisherAgent:
def __init__(self, output_dir: str, websocket=None, stream_output=None, headers=None):
self.websocket = websocket
self.stream_output = stream_output
self.output_dir = output_dir
self.headers = headers or {}
# Ensure the output directory exists
os.makedirs(self.output_dir, exist_ok=True)
async def publish_research_report(self, research_state: dict, publish_formats: dict):
layout = self.generate_layout(research_state)
await self.write_report_by_formats(layout, publish_formats)
return layout
def generate_layout(self, research_state: dict):
sections = '\n\n'.join(f"{value}"
for subheader in research_state.get("research_data")
for key, value in subheader.items())
references = '\n'.join(f"{reference}" for reference in research_state.get("sources"))
headers = research_state.get("headers")
layout = f"""# {headers.get('title')}
#### {headers.get("date")}: {research_state.get('date')}
## {headers.get("introduction")}
{research_state.get('introduction')}
## {headers.get("table_of_contents")}
{research_state.get('table_of_contents')}
{sections}
## {headers.get("conclusion")}
{research_state.get('conclusion')}
## {headers.get("references")}
{references}
"""
return layout
async def write_report_by_formats(self, layout: str, publish_formats: dict):
if publish_formats.get("pdf"):
await write_md_to_pdf(layout, self.output_dir)
if publish_formats.get("docx"):
await write_md_to_word(layout, self.output_dir)
if publish_formats.get("markdown"):
await write_text_to_md(layout, self.output_dir)
async def run(self, research_state: dict):
task = research_state.get("task")
publish_formats = task.get("publish_formats")
if self.websocket and self.stream_output:
await self.stream_output("logs", "publishing", f"Publishing final research report based on retrieved data...", self.websocket)
else:
print_agent_output(output="Publishing final research report based on retrieved data...", agent="PUBLISHER")
final_research_report = await self.publish_research_report(research_state, publish_formats)
return {"report": final_research_report}
# utils/file_formats.py
async def write_to_file(filename: str, text: str):
# Ensure the directory exists
os.makedirs(os.path.dirname(filename), exist_ok=True)
async with aiofiles.open(filename, "w", encoding='utf-8') as file:
await file.write(text)
async def write_md_to_pdf(md_content: str, output_dir: str):
# Assume a function that converts markdown to PDF and writes to a file
pdf_path = os.path.join(output_dir, 'report.pdf')
await write_to_file(pdf_path, md_content)
async def write_md_to_word(md_content: str, output_dir: str):
# Assume a function that converts markdown to Word and writes to a file
word_path = os.path.join(output_dir, 'report.docx')
await write_to_file(word_path, md_content)
async def write_text_to_md(md_content: str, output_dir: str):
md_path = os.path.join(output_dir, 'report.md')
await write_to_file(md_path, md_content)
However, it still gives the same error. it does output the text, but the error appears to be when publishing to a file.
My task.json
:
{
"query": "Your task is to write a small report on Working capital structure of Pernod Ricard. the years are from 2012 to 2023 but 2020 and 2015 are missing. covid and economic instability issues with the numbers. Write about main components and their interplay. Do not mention the table numbers separately. They will be given as one big table and you will reference the table itself.",
"max_sections": 1,
"publish_formats": {
"markdown": true,
"pdf": false,
"docx": false
},
"source": "local",
"follow_guidelines": false,
"model": "gpt-4o-mini",
"guidelines": [
"Something"
],
"verbose": true
}
I also tried to solve it by editing file_formats.py
in utils
like this, but error persisted.
async def write_to_file(filename: str, text: str) -> None:
"""Asynchronously write text to a file in UTF-8 encoding.
Args:
filename (str): The filename to write to.
text (str): The text to write.
"""
# Ensure the directory exists
os.makedirs(os.path.dirname(filename), exist_ok=True)
# Convert text to UTF-8, replacing any problematic characters
text_utf8 = text.encode('utf-8', errors='replace').decode('utf-8')
async with aiofiles.open(filename, "w", encoding='utf-8') as file:
await file.write(text_utf8)
I did my best based on my limited understanding. If someone has some ideas, i welcome them.