Describe the bug
I deployed TheBloke/Mixtral-8x7B-v0.1-GPTQ using VLLM backend.
I get an error when I call the openllm query
When I try the api call to http://localhost:3000/v1/generate it works fine
I also tried to llamaindex openLLM wrapper and I get the same error
To reproduce
1 - openllm start TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ --dtype float16 --max-model-len 4096 --gpu-memory-utilization 1
2 - openllm query hello
Expected behavior
(base) ybrini@localhost:~$ openllm query hello
helloTraceback (most recent call last):
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 385, in _request
response.raise_for_status()
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/httpx/_models.py", line 759, in raise_for_status
raise HTTPStatusError(message, request=request, response=self)
httpx.HTTPStatusError: Server error '504 No response from server' for url 'http://localhost:3000/v1/metadata'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/504
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 385, in _request
response.raise_for_status()
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/httpx/_models.py", line 759, in raise_for_status
raise HTTPStatusError(message, request=request, response=self)
httpx.HTTPStatusError: Server error '504 No response from server' for url 'http://localhost:3000/v1/metadata'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/504
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 385, in _request
response.raise_for_status()
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/httpx/_models.py", line 759, in raise_for_status
raise HTTPStatusError(message, request=request, response=self)
httpx.HTTPStatusError: Server error '504 No response from server' for url 'http://localhost:3000/v1/metadata'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/504
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ybrini/miniconda3/bin/openllm", line 8, in
sys.exit(cli())
^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/click/core.py", line 1157, in call
return self.main(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/click/core.py", line 1078, in main
rv = self.invoke(ctx)
^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_cli/entrypoint.py", line 160, in wrapper
return_value = func(*args, **attrs)
^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/click/decorators.py", line 33, in new_func
return f(get_current_context(), *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_cli/entrypoint.py", line 141, in wrapper
return f(*args, **attrs)
^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/click/decorators.py", line 33, in new_func
return f(get_current_context(), *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_cli/entrypoint.py", line 1089, in query_command
for it in stream_res:
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_http.py", line 96, in generate_stream
for response_chunk in self.generate_iterator(prompt, llm_config, stop, adapter_name, timeout, verify, **attrs):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_http.py", line 107, in generate_iterator
llm_config = {**self._config, **attrs}
^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_http.py", line 66, in _config
self.__config = self._metadata.configuration
^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_http.py", line 60, in _metadata
self.__metadata = self._post(path, response_cls=Metadata, json={}, options={'max_retries': self._max_retries})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 445, in _post
return self.request(response_cls, RequestOptions(method='POST', url=path, json=json, **options), stream=stream, stream_cls=stream_cls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 369, in request
return self._request(response_cls=response_cls, options=options, remaining_retries=remaining_retries, stream=stream, stream_cls=stream_cls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 388, in _request
return self._retry_request(response_cls, options, retries, exc.response.headers, stream=stream, stream_cls=stream_cls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 418, in _retry_request
return self._request(response_cls, options, remaining, stream=stream, stream_cls=stream_cls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 388, in _request
return self._retry_request(response_cls, options, retries, exc.response.headers, stream=stream, stream_cls=stream_cls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 418, in _retry_request
return self._request(response_cls, options, remaining, stream=stream, stream_cls=stream_cls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ybrini/miniconda3/lib/python3.11/site-packages/openllm_client/_shim.py", line 391, in _request
raise ValueError(exc.message) from None
^^^^^^^^^^^
AttributeError: 'HTTPStatusError' object has no attribute 'message'
Environment
sqdds