browser-use icon indicating copy to clipboard operation
browser-use copied to clipboard

Ollama example broken

Open arsen3d opened this issue 11 months ago • 12 comments

import os

# Optional: Disable telemetry
# os.environ["ANONYMIZED_TELEMETRY"] = "false"

# Optional: Set the OLLAMA host to a remote server
os.environ["OLLAMA_HOST"] = "http://localhost:11434"

import asyncio
from browser_use import Agent
from langchain_ollama import ChatOllama


async def run_search() -> str:
    agent = Agent(
        task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.",
        llm=ChatOllama(
            model="qwen2.5:32b-instruct-q4_K_M",
            num_ctx=32000,
        ),
    )

    result = await agent.run()
    return result


async def main():
    result = await run_search()
    print("\n\n", result)


if __name__ == "__main__":
    asyncio.run(main())

Terminal Log:

(base) PS C:\Users\arsen\repos\tools> python .\qwen.py   
INFO     [browser_use] BrowserUse logging setup complete with level info
INFO     [root] Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information.
INFO     [agent] 🚀 Starting task: Go to fahdmirza.com and return me the title of first blog post from home page.
INFO     [agent]
📍 Step 1
response {'raw': AIMessage(content='<tool_call>\n{"name": "go_to_url", "args": {"url": "https://fahdmirza.com"}}\n</tool_call>', additional_kwargs={}, response_metadata={'model': 'qwen2.5:latest', 'created_at': '2025-01-13T06:07:43.2262085Z', 'done': True, 'done_reason': 'stop', 'total_duration': 5119936400, 'load_duration': 4080449500, 'prompt_eval_count': 1798, 'prompt_eval_duration': 471000000, 'eval_count': 28, 'eval_duration': 232000000, 'message': Message(role='assistant', content='<tool_call>\n{"name": "go_to_url", "args": {"url": "https://fahdmirza.com"}}\n</tool_call>', images=None, tool_calls=None)}, id='run-a2d06cfe-f85d-4c93-ab65-e757a9893976-0', usage_metadata={'input_tokens': 1798, 'output_tokens': 28, 'total_tokens': 1826}), 'parsed': None, 'parsing_error': None}
ERROR    [agent] ❌ Result failed 1/3 times:
 Could not parse response.
INFO     [agent]
📍 Step 1
response {'raw': AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5:latest', 'created_at': '2025-01-13T06:07:44.913768Z', 'done': True, 'done_reason': 'stop', 'total_duration': 491553500, 'load_duration': 12114300, 'prompt_eval_count': 1816, 'prompt_eval_duration': 17000000, 'eval_count': 55, 'eval_duration': 424000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-014a3a90-5b3a-405a-9e59-ace35b468666-0', tool_calls=[{'name': 'AgentOutput', 'args': {'action': [], 'current_state': {'evaluation_previous_goal': 'Unknown - No previous actions to evaluate.', 'memory': '', 'next_goal': 'Navigate to fahdmirza.com'}}, 'id': 'c67a922d-c329-4e0d-b5cc-568e17af709f', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1816, 'output_tokens': 55, 'total_tokens': 1871}), 'parsed': AgentOutput(current_state=AgentBrain(evaluation_previous_goal='Unknown - No previous actions to evaluate.', memory='', next_goal='Navigate to fahdmirza.com'), action=[]), 'parsing_error': None}
INFO     [agent] 🤷 Eval: Unknown - No previous actions to evaluate.
INFO     [agent] 🧠 Memory:
INFO     [agent] 🎯 Next goal: Navigate to fahdmirza.com
INFO     [agent] 
📍 Step 2
response {'raw': AIMessage(content='{\n  "current_state": {\n    "evaluation_previous_goal": "Unknown - No previous actions to evaluate.",\n    "memory": "",\n    "next_goal": "Navigate to fahdmirza.com"\n  },\n  "action": [\n    {\n      "go_to_url": {\n        "url": "https://fahdmirza.com"\n      }\n    }\n  ]\n}', additional_kwargs={}, response_metadata={'model': 'qwen2.5:latest', 'created_at': '2025-01-13T06:07:46.8335417Z', 'done': True, 'done_reason': 'stop', 'total_duration': 711811700, 'load_duration': 12522100, 'prompt_eval_count': 1890, 'prompt_eval_duration': 14000000, 'eval_count': 80, 'eval_duration': 619000000, 'message': Message(role='assistant', content='{\n  "current_state": {\n    "evaluation_previous_goal": "Unknown - No previous actions to evaluate.",\n    "memory": "",\n    "next_goal": "Navigate to fahdmirza.com"\n  },\n  "action": [\n    {\n      "go_to_url": {\n        "url": "https://fahdmirza.com"\n      }\n    }\n  ]\n}', images=None, tool_calls=None)}, id='run-e9e0c7ca-be82-49d5-96c7-e5286b820d17-0', usage_metadata={'input_tokens': 1890, 'output_tokens': 80, 'total_tokens': 1970}), 'parsed': None, 'parsing_error': None}    
ERROR    [agent] ❌ Result failed 1/3 times:
 Could not parse response.
INFO     [agent]
📍 Step 2
response {'raw': AIMessage(content='{\n  "current_state": {\n    "evaluation_previous_goal": "Unknown - No previous actions to evaluate.",\n    "memory": "",\n    "next_goal": "Navigate to fahdmirza.com"\n  },\n  "action": [\n    {\n      "go_to_url": {\n        "url": "https://fahdmirza.com"\n      }\n    }\n  ]\n}', additional_kwargs={}, response_metadata={'model': 'qwen2.5:latest', 'created_at': '2025-01-13T06:07:48.7782908Z', 'done': True, 'done_reason': 'stop', 'total_duration': 715041500, 'load_duration': 16863000, 'prompt_eval_count': 1908, 'prompt_eval_duration': 15000000, 'eval_count': 80, 'eval_duration': 620000000, 'message': Message(role='assistant', content='{\n  "current_state": {\n    "evaluation_previous_goal": "Unknown - No previous actions to evaluate.",\n    "memory": "",\n    "next_goal": "Navigate to fahdmirza.com"\n  },\n  "action": [\n    {\n      "go_to_url": {\n        "url": "https://fahdmirza.com"\n      }\n    }\n  ]\n}', images=None, tool_calls=None)}, id='run-9c8e21f3-bfdc-4625-9e97-7b675bae2604-0', usage_metadata={'input_tokens': 1908, 'output_tokens': 80, 'total_tokens': 1988}), 'parsed': None, 'parsing_error': None}    
ERROR    [agent] ❌ Result failed 2/3 times:
 Could not parse response.
INFO     [agent]
📍 Step 2
response {'raw': AIMessage(content='{\n  "current_state": {\n    "evaluation_previous_goal": "Unknown - No previous actions to evaluate.",\n    "memory": "",\n    "next_goal": "Navigate to fahdmirza.com"\n  },\n  "action": [\n    {\n      "go_to_url": {\n        "url": "https://fahdmirza.com"\n      }\n    }\n  ]\n}', additional_kwargs={}, response_metadata={'model': 'qwen2.5:latest', 'created_at': '2025-01-13T06:07:50.6908514Z', 'done': True, 'done_reason': 'stop', 'total_duration': 719308200, 'load_duration': 17107000, 'prompt_eval_count': 1926, 'prompt_eval_duration': 17000000, 'eval_count': 80, 'eval_duration': 620000000, 'message': Message(role='assistant', content='{\n  "current_state": {\n    "evaluation_previous_goal": "Unknown - No previous actions to evaluate.",\n    "memory": "",\n    "next_goal": "Navigate to fahdmirza.com"\n  },\n  "action": [\n    {\n      "go_to_url": {\n        "url": "https://fahdmirza.com"\n      }\n    }\n  ]\n}', images=None, tool_calls=None)}, id='run-7102b95e-a07a-4210-ade3-cf578f4721f9-0', usage_metadata={'input_tokens': 1926, 'output_tokens': 80, 'total_tokens': 2006}), 'parsed': None, 'parsing_error': None}    
ERROR    [agent] ❌ Result failed 3/3 times:
 Could not parse response.
ERROR    [agent] ❌ Stopping due to 3 consecutive failures
INFO     [agent] Created GIF at agent_history.gif


 AgentHistoryList(all_results=[ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True)], all_model_outputs=[])

Can https://github.com/browser-use/browser-use/blob/main/examples/ollama.py be added to tests? https://github.com/browser-use/browser-use/blob/main/tests/test_qwen.py

arsen3d avatar Jan 13 '25 06:01 arsen3d

Here is what my working example looks like:


# Optional: Set the OLLAMA host to a remote server
os.environ["OLLAMA_HOST"] = "http://localhost:11434"

import asyncio
from browser_use import Agent
from browser_use.agent.views import AgentHistoryList
from langchain_ollama import ChatOllama


async def run_search() -> AgentHistoryList:
    agent = Agent(
        task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.",
        llm=ChatOllama(
           model='qwen2.5:latest',
           num_ctx=128000,
        ),
        tool_call_in_content=False,
    )

    result = await agent.run()
    return result


async def main():
    result = await run_search()
    print("\n\n", result)


if __name__ == "__main__":
    asyncio.run(main())

I've changed: model='qwen2.5:latest', num_ctx=128000, tool_call_in_content=False, FYI: qwen2.5:latest 845dbda0ea48 4.7 GB 24 hours ago qwen2.5:32b-instruct-q4_K_M 9f13ba1299af 19 GB 25 hours ago

It would be

arsen3d avatar Jan 13 '25 20:01 arsen3d

os.environ["OLLAMA_HOST"] = "http://localhost:11434"

import asyncio from browser_use import Agent from browser_use.agent.views import AgentHistoryList from langchain_ollama import ChatOllama

async def run_search() -> AgentHistoryList: agent = Agent( task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.", llm=ChatOllama( model='qwen2.5:latest', num_ctx=128000, ), tool_call_in_content=False, )

result = await agent.run()
return result

async def main(): result = await run_search() print("\n\n", result)

if name == "main": asyncio.run(main())

How exactly are you getting your to work? I keep getting this error when trying to use the qwen2.5-coder:latest model. Are you not running into this?

(.venv) C:\Programs\browser-use>c:/Programs/browser-use/.venv/Scripts/python.exe c:/Programs/browser-use/ollama2.py INFO [browser_use] BrowserUse logging setup complete with level info INFO [root] Anonymized telemetry enabled. See https://github.com/browser-use/browser-use for more information. INFO [agent] 🚀 Starting task: Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it. INFO [agent] 📍 Step 1 ERROR [agent] ❌ Result failed 1/3 times: Could not parse response. INFO [agent] 📍 Step 1 ERROR [browser] Failed to update state: Page.screenshot: Timeout 30000ms exceeded. Call log:

  • taking page screenshot
  • - disabled all CSS animations
    
    • waiting for fonts to load...
    • fonts loaded

ERROR [agent] ❌ Result failed 2/3 times: Could not parse response. INFO [agent] 📍 Step 1 ERROR [agent] ❌ Result failed 3/3 times: Could not parse response. ERROR [agent] ❌ Stopping due to 3 consecutive failures INFO [agent] Created GIF at agent_history.gif

AgentHistoryList(all_results=[ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True)], all_model_outputs=[])

TheLocalLab avatar Jan 15 '25 03:01 TheLocalLab

os.environ["OLLAMA_HOST"] = "http://localhost:11434" import asyncio from browser_use import Agent from browser_use.agent.views import AgentHistoryList from langchain_ollama import ChatOllama async def run_search() -> AgentHistoryList: agent = Agent( task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.", llm=ChatOllama( model='qwen2.5:latest', num_ctx=128000, ), tool_call_in_content=False, )

result = await agent.run()
return result

async def main(): result = await run_search() print("\n\n", result) if name == "main": asyncio.run(main())

How exactly are you getting your to work? I keep getting this error when trying to use the qwen2.5-coder:latest model. Are you not running into this?

(.venv) C:\Programs\browser-use>c:/Programs/browser-use/.venv/Scripts/python.exe c:/Programs/browser-use/ollama2.py INFO [browser_use] BrowserUse logging setup complete with level info INFO [root] Anonymized telemetry enabled. See https://github.com/browser-use/browser-use for more information. INFO [agent] 🚀 Starting task: Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it. INFO [agent] 📍 Step 1 ERROR [agent] ❌ Result failed 1/3 times: Could not parse response. INFO [agent] 📍 Step 1 ERROR [browser] Failed to update state: Page.screenshot: Timeout 30000ms exceeded. Call log:

  • taking page screenshot
  • - disabled all CSS animations
    
    • waiting for fonts to load...
    • fonts loaded

ERROR [agent] ❌ Result failed 2/3 times: Could not parse response. INFO [agent] 📍 Step 1 ERROR [agent] ❌ Result failed 3/3 times: Could not parse response. ERROR [agent] ❌ Stopping due to 3 consecutive failures INFO [agent] Created GIF at agent_history.gif

AgentHistoryList(all_results=[ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True)], all_model_outputs=[])

Paste in the code you are trying to run.

arsen3d avatar Jan 21 '25 18:01 arsen3d

Wow, thank you for this @arsen3d!! 🎉 🙌 The [Ollama example](https://github.com/browser-use/browser-use/blob/main/examples/ollama.py works for me now!

-- Follow-up --

After some bisecting to figure out the culprit(s), the only thing that mattered for me to change were related to the AgentHistoryList type:

  1. adding the import from browser_use.agent.views import AgentHistoryList
  2. Updating line 14 from the example from a str: to a AgentHistoryList: (async def run_search() -> str: => async def run_search() -> AgentHistoryList:)

These other small tweaks you made didn't matter for me:

  1. model='qwen2.5:latest', (keeping the original model="qwen2.5:32b-instruct-q4_K_M", worked fine for me, even after pulling latest)
  2. num_ctx=128000, (keeping the original num_ctx=32000, from the Ollama example didn't need to change because I could still use the original qwen model and that context window size)
  3. tool_call_in_content=False, -- I tried both with and without this set and the example worked fine.

(And minor note around your block of code, in case some folks try to just copy and paste it and don't have a linter like ruff to catch it: you forgot the import os at the top...that said, the os.environ["OLLAMA_HOST"] = "http://x.x.x.x:11434" does seem optional if you're just running it locally at that default port for ollama anyway)

esthor avatar Jan 24 '25 17:01 esthor

os.environ["OLLAMA_HOST"] = "http://localhost:11434" import asyncio from browser_use import Agent from browser_use.agent.views import AgentHistoryList from langchain_ollama import ChatOllama async def run_search() -> AgentHistoryList: agent = Agent( task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.", llm=ChatOllama( model='qwen2.5:latest', num_ctx=128000, ), tool_call_in_content=False, )

result = await agent.run()
return result

async def main(): result = await run_search() print("\n\n", result) if name == "main": asyncio.run(main())

How exactly are you getting your to work? I keep getting this error when trying to use the qwen2.5-coder:latest model. Are you not running into this? (.venv) C:\Programs\browser-use>c:/Programs/browser-use/.venv/Scripts/python.exe c:/Programs/browser-use/ollama2.py INFO [browser_use] BrowserUse logging setup complete with level info INFO [root] Anonymized telemetry enabled. See https://github.com/browser-use/browser-use for more information. INFO [agent] 🚀 Starting task: Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it. INFO [agent] 📍 Step 1 ERROR [agent] ❌ Result failed 1/3 times: Could not parse response. INFO [agent] 📍 Step 1 ERROR [browser] Failed to update state: Page.screenshot: Timeout 30000ms exceeded. Call log:

  • taking page screenshot
  • - disabled all CSS animations
    
    • waiting for fonts to load...
    • fonts loaded

ERROR [agent] ❌ Result failed 2/3 times: Could not parse response. INFO [agent] 📍 Step 1 ERROR [agent] ❌ Result failed 3/3 times: Could not parse response. ERROR [agent] ❌ Stopping due to 3 consecutive failures INFO [agent] Created GIF at agent_history.gif AgentHistoryList(all_results=[ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True), ActionResult(is_done=False, extracted_content=None, error='Could not parse response.\n\nReturn a valid JSON object with the required fields.', include_in_memory=True)], all_model_outputs=[])

Paste in the code you are trying to run.

Pretty much the same code as yours.

import asyncio from browser_use import Agent from browser_use.agent.views import AgentHistoryList from langchain_ollama import ChatOllama

async def run_search() -> AgentHistoryList: agent = Agent( task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.", llm=ChatOllama( model='qwen2.5-coder:latest', num_ctx=16000, ), tool_call_in_content=False, )

result = await agent.run()
return result

async def main(): result = await run_search() print("\n\n", result)

if name == "main": asyncio.run(main())

TheLocalLab avatar Jan 26 '25 06:01 TheLocalLab

@esthor The link slightly changed, and it works

https://github.com/browser-use/browser-use/blob/main/examples/models/ollama.py

I only updated the model from original to qwen2.5:7b

Thank you

cyb3rsalih avatar Jan 28 '25 07:01 cyb3rsalih

@cyb3rsalih nice! Yeah, we updated that example file with this PR https://github.com/browser-use/browser-use/pull/373 just a couple hours after my last comment above. 🚀

Glad it's working for you! 🎉

@MagMueller -- This issue is resolved. Can we close it? ✅

esthor avatar Feb 03 '25 16:02 esthor

(I added the same comment in PR #373 )

I don't think this solved the problem. This function does return AgentHistoryList and you have corrected the typing.

But the problem is still there. These models are flaky and if you run the test 10 times, it will fail at least 50% with:

ERROR    [agent] ❌ Result failed 1/3 times:
 Could not parse response.

rjambrecic avatar Feb 05 '25 09:02 rjambrecic

@rjambrecic -- are you running the updated ollama.py exactly as it s here: https://github.com/browser-use/browser-use/blob/main/examples/models/ollama.py ?

If you are trying with a different model, try going back to model="qwen2.5:32b-instruct-q4_K_M", and seeing if it works.

I've encountered the error you're getting when using some other models. (Not all the ones listed on ollama's site with the tools tag work well in my experience.)

(I added the same comment in PR #373 )

I don't think this solved the problem. This function does return AgentHistoryList and you have corrected the typing.

But the problem is still there. These models are flaky and if you run the test 10 times, it will fail at least 50% with:

ERROR [agent] ❌ Result failed 1/3 times: Could not parse response.

esthor avatar Feb 09 '25 20:02 esthor

Image, i believe the parsing error still there.

yamlik2024 avatar Feb 28 '25 04:02 yamlik2024

I'm using Qwen2.5 and I was able to make it work by making the following changes:

  1. Add tool_calling_method = 'json_mode' to the agent specification.
  2. Apply the changes suggested by #736
  3. Add the following line under item 5 Task Completion of site-packages/browser_use/agent/system_prompt.md (see attached) - Make sure the done action output takes the following format [{{"done": {{"success": bool, "text": str }}}}]

Explanations: 1 help fix the parsing errors due to the specific layout of the qwen model. May work on other Ollama models. 2 resolve some of the more challenging cases. 3 resolve the task non-completion error or endless loops created by the fact that some models do not return valid "done" actions.

Here is a revised example with local proxy.

import os
 # Clear proxy environment variables before any other imports; not needed if you do not need proxy
os.environ.pop('ALL_PROXY', None)
os.environ.pop('all_proxy', None)   

import asyncio
from dotenv import load_dotenv
from playwright._impl._api_structures import ProxySettings

from langchain_ollama import ChatOllama
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.agent.views import AgentHistoryList

load_dotenv()

llm = ChatOllama(
  model="qwen2.5:14b",
  num_ctx=32000,
  temperature=0.0,
)

dummy_proxy = ProxySettings(server="socks5://127.0.0.1:1080")
browser = Browser(
    config=BrowserConfig(
        headless=True,
        proxy=dummy_proxy  #remove this line if you do not need proxy.
    )
)

async def run_search() -> AgentHistoryList:
    agent = Agent(
        task=(
        '1. Go to https://google.com'
        "2. Search for 'browser use' in the search bar"
        '3. Click search'
        '4. Call done'
        ),
        llm=llm,
        browser=browser,
        use_vision=False,
        tool_calling_method = 'json_mode'
    )

    result = await agent.run()
    return result

async def main():
    result = await run_search()
    print("\n\n", result)

if __name__ == "__main__":
    asyncio.run(main())

system_prompt.md

hanlulong avatar Mar 14 '25 22:03 hanlulong

A fix for models that don't support our usual tool calling format has just been merged, please pull main and try again: https://github.com/browser-use/browser-use/pull/736

pirate avatar Mar 26 '25 23:03 pirate

I've had good results with langchain-ollama==0.3.0

uv pip uninstall langchain-ollama uv pip install langchain-ollama==0.3.0

and mistral-small:24b or hf.co/bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF:Q4_K_L

ascotai avatar Apr 12 '25 18:04 ascotai

This is a duplicate of the Ollama parsing issues reported in #220 and other issues. The "Could not parse response" error occurs because local Ollama models don't consistently generate the structured JSON format that browser-use expects.

Cfomodz avatar Aug 27 '25 06:08 Cfomodz