paper-qa
paper-qa copied to clipboard
Cannot use "mixtral-8x7b-instruct-v0.1.Q4_K_M" as local LLM
Hello,
I'm trying to use paper-qa with a "mixtral-8x7b-instruct-v0.1.Q4_K_M" on a local network. The LLM executable llamafile is launched with "-cb -np 4 -a my-llm-model --embedding" options as described in documentation. It works fine and is accessible remotely on local network via browser.
I tried following code to try paper-qa :
from paperqa import Settings, ask
import os
import litellm
litellm.set_verbose=True
os.environ["OPENAI_API_KEY"] = ""
os.environ['MISTRAL_API_KEY'] = ""
local_llm_config = dict(
model_list=[
dict(
model_name=f"mistral/my_llm_model",
litellm_params=dict(
model=f"mistral/my-llm-model",
api_base="http://192.168.1.15:8080/", #LLM model is deployed remotely ; same issue when deployed on the same machine
api_key="",
temperature=0.1,
frequency_penalty=1.5,
max_tokens=512,
),
)
]
)
answer = ask(
"What manufacturing challenges are unique to bispecific antibodies?",
settings=Settings(
llm=f"mistral/my-llm-model",
llm_config=local_llm_config,
summary_llm=f"mistral/my-llm-model",
summary_llm_config=local_llm_config,
paper_directory="./Papers",
),
)
print(answer)
But get this error :
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
InMemoryCache: set_cache. current size= 6
Traceback (most recent call last):
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_transports\default.py", line 72, in map_httpcore_exceptions
yield
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_transports\default.py", line 377, in handle_async_request
resp = await self._pool.handle_async_request(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpcore\_async\connection_pool.py", line 216, in handle_async_request
raise exc from None
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpcore\_async\connection_pool.py", line 196, in handle_async_request
response = await connection.handle_async_request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpcore\_async\connection.py", line 101, in handle_async_request
return await self._connection.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpcore\_async\http11.py", line 143, in handle_async_request
raise exc
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpcore\_async\http11.py", line 93, in handle_async_request
await self._send_request_headers(**kwargs)
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpcore\_async\http11.py", line 151, in _send_request_headers
with map_exceptions({h11.LocalProtocolError: LocalProtocolError}):
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\contextlib.py", line 158, in __exit__
self.gen.throw(value)
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpcore\_exceptions.py", line 14, in map_exceptions
raise to_exc(exc) from exc
httpcore.LocalProtocolError: Illegal header value b'Bearer '
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\openai\_base_client.py", line 1554, in _request
response = await self._client.send(
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_client.py", line 1674, in send
response = await self._send_handling_auth(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_client.py", line 1702, in _send_handling_auth
response = await self._send_handling_redirects(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_client.py", line 1739, in _send_handling_redirects
response = await self._send_single_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_client.py", line 1776, in _send_single_request
response = await transport.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_transports\default.py", line 376, in handle_async_request
with map_httpcore_exceptions():
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\contextlib.py", line 158, in __exit__
self.gen.throw(value)
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\httpx\_transports\default.py", line 89, in map_httpcore_exceptions
raise mapped_exc(message) from exc
httpx.LocalProtocolError: Illegal header value b'Bearer '
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\llms\OpenAI\openai.py", line 835, in acompletion
headers, response = await self.make_openai_chat_completion_request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\llms\OpenAI\openai.py", line 548, in make_openai_chat_completion_request
raise e
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\llms\OpenAI\openai.py", line 536, in make_openai_chat_completion_request
await openai_aclient.chat.completions.with_raw_response.create(
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\openai\_legacy_response.py", line 370, in wrapped
return cast(LegacyAPIResponse[R], await func(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\openai\resources\chat\completions.py", line 1412, in create
return await self._post(
^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\openai\_base_client.py", line 1821, in post
return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\openai\_base_client.py", line 1515, in request
return await self._request(
^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\openai\_base_client.py", line 1588, in _request
raise APIConnectionError(request=request) from err
openai.APIConnectionError: Connection error.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\main.py", line 434, in acompletion
response = await init_response
^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\llms\OpenAI\openai.py", line 866, in acompletion
raise OpenAIError(
litellm.llms.OpenAI.openai.OpenAIError: Connection error.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Jérome DUCREUX\Desktop\PaperQA\paper_qa.py", line 26, in <module>
answer = ask(
^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\agents\__init__.py", line 100, in ask
return get_loop().run_until_complete(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\asyncio\base_events.py", line 687, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\agents\main.py", line 61, in agent_query
response = await run_agent(docs, query, agent_type, **runner_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\agents\main.py", line 110, in run_agent
answer, agent_status = await run_fake_agent(query, docs, **runner_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\agents\main.py", line 165, in run_fake_agent
for search in await litellm_get_search_query(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\agents\helpers.py", line 56, in litellm_get_search_query
result = await model.run_prompt(
^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\llms.py", line 182, in run_prompt
return await self._run_chat(
^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\llms.py", line 235, in _run_chat
chunk = await self.achat(messages)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\paperqa\llms.py", line 452, in achat
response = await self.router.acompletion(self.name, messages)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 739, in acompletion
raise e
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 727, in acompletion
response = await self.async_function_with_fallbacks(**kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 3043, in async_function_with_fallbacks
raise original_exception
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 2897, in async_function_with_fallbacks
response = await self.async_function_with_retries(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 3173, in async_function_with_retries
raise original_exception
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 3087, in async_function_with_retries
response = await self.make_call(original_function, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 3180, in make_call
response = await original_function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 878, in _acompletion
raise e
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\router.py", line 850, in _acompletion
response = await _response
^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\utils.py", line 1571, in wrapper_async
raise e
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\utils.py", line 1386, in wrapper_async
result = await original_function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\main.py", line 456, in acompletion
raise exception_type(
^^^^^^^^^^^^^^^
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\litellm_core_utils\exception_mapping_utils.py", line 2033, in exception_type
raise e
File "C:\ProgramData\anaconda3\envs\PaperQA_env\Lib\site-packages\litellm\litellm_core_utils\exception_mapping_utils.py", line 419, in exception_type
raise APIError(
litellm.exceptions.APIError: litellm.APIError: APIError: MistralException - Connection error.
Received Model Group=mistral/my-llm-model
Available Model Group Fallbacks=None LiteLLM Retried: 2 times, LiteLLM Max Retries: 3
Any idea to fix this ?
Thanks.
I finally managed to make it work with this config :
local_llm_config = dict(
model_list=[
dict(
model_name=f"openai/my-llm-model",
litellm_params=dict(
model=f"openai/my-llm-model",
api_base="http://192.168.1.15:8080/v1/",
api_key="sk-no-key-required",
verbose=3,
temperature=0.7,
frequency_penalty=1.5,
max_tokens=512,
),
)
]
)
Llamafiles use openai protocol and api_base have to be "http://192.168.1.15:8080/v1/"
But still had an OpenAI connection error... I guessed it was a problem of embedding model thad was not local.
So, i used this solution to make it work :
embedding_model = SparseEmbeddingModel(ndim=256)
docs = Docs()
file_list = arr = os.listdir("./Papers")
for doc in file_list:
try:
docs.add(str("./Papers/"+str(doc)),
citation="File " + doc, docname=doc,
settings=settings,
embedding_model=embedding_model)
except ValueError as e:
# sometimes this happens if PDFs aren't downloaded or readable
print("Could not read", doc, e)
with open("./Docs/my_docs.pkl", "wb") as f:
pickle.dump(docs, f)
answer = docs.query(
"Quels sont les exemples de nécropoles mérovingiennes dans le Nord Pas de Calais?",
settings=settings,
embedding_model=embedding_model,
)
print(answer)
It works but sparse embedding model accuracy is not optimal... Is it a way to pass local embedding model directly in settings in the same way than llm model and summary llm model ?
Is my local LLM can be used as embedding model ?
Best regards.