redis-ai-resources redis-ai-resources/python-recipes/semantic-cache/01_doc2cache_llama3

redis-ai-resources/python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb ERROR #2

Open amscosta opened this issue 6 months ago • 1 comments

When running the following cell :

faqs = doc2cache.invoke({"doc": sample_doc})

Results in the following output: 16:17:24 openai._base_client INFO Retrying request to /completions in 0.433632 seconds 16:17:25 openai._base_client INFO Retrying request to /completions in 0.830750 seconds

RemoteProtocolError Traceback (most recent call last) File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:101, in map_httpcore_exceptions() 100 try: --> 101 yield 102 except Exception as exc:

File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:250, in HTTPTransport.handle_request(self, request) 249 with map_httpcore_exceptions(): --> 250 resp = self._pool.handle_request(req) 252 assert isinstance(resp.stream, typing.Iterable)

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:256, in ConnectionPool.handle_request(self, request) 255 self._close_connections(closing) --> 256 raise exc from None 258 # Return the response. Note that in this case we still have to manage 259 # the point at which the response is closed.

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:236, in ConnectionPool.handle_request(self, request) 234 try: 235 # Send the request on the assigned connection. --> 236 response = connection.handle_request( 237 pool_request.request 238 ) 239 except ConnectionNotAvailable: 240 # In some cases a connection may initially be available to 241 # handle a request, but then become unavailable. 242 # 243 # In this case we clear the connection and try again.

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection.py:103, in HTTPConnection.handle_request(self, request) 101 raise exc --> 103 return self._connection.handle_request(request)

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:136, in HTTP11Connection.handle_request(self, request) 135 self._response_closed() --> 136 raise exc

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:106, in HTTP11Connection.handle_request(self, request) 97 with Trace( 98 "receive_response_headers", logger, request, kwargs 99 ) as trace: 100 ( 101 http_version, 102 status, 103 reason_phrase, 104 headers, 105 trailing_data, --> 106 ) = self._receive_response_headers(**kwargs) 107 trace.return_value = ( 108 http_version, 109 status, 110 reason_phrase, 111 headers, 112 )

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:177, in HTTP11Connection._receive_response_headers(self, request) 176 while True: --> 177 event = self._receive_event(timeout=timeout) 178 if isinstance(event, h11.Response):

File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:231, in HTTP11Connection._receive_event(self, timeout) 230 msg = "Server disconnected without sending a response." --> 231 raise RemoteProtocolError(msg) 233 self._h11_state.receive_data(data)

RemoteProtocolError: Server disconnected without sending a response.

The above exception was the direct cause of the following exception:

RemoteProtocolError Traceback (most recent call last) File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:989, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls) 988 try: --> 989 response = self._client.send( 990 request, 991 stream=stream or self._should_stream_response_body(request=request), 992 **kwargs, 993 ) 994 except httpx.TimeoutException as err:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:914, in Client.send(self, request, stream, auth, follow_redirects) 912 auth = self._build_request_auth(request, auth) --> 914 response = self._send_handling_auth( 915 request, 916 auth=auth, 917 follow_redirects=follow_redirects, 918 history=[], 919 ) 920 try:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history) 941 while True: --> 942 response = self._send_handling_redirects( 943 request, 944 follow_redirects=follow_redirects, 945 history=history, 946 ) 947 try:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history) 977 hook(request) --> 979 response = self._send_single_request(request) 980 try:

File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:1014, in Client._send_single_request(self, request) 1013 with request_context(request=request): -> 1014 response = transport.handle_request(request) 1016 assert isinstance(response.stream, SyncByteStream)

File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:249, in HTTPTransport.handle_request(self, request) 237 req = httpcore.Request( 238 method=request.method, 239 url=httpcore.URL( (...) 247 extensions=request.extensions, 248 ) --> 249 with map_httpcore_exceptions(): 250 resp = self._pool.handle_request(req)

File C:\Program Files\Python313\Lib\contextlib.py:162, in _GeneratorContextManager.exit(self, typ, value, traceback) 161 try: --> 162 self.gen.throw(value) 163 except StopIteration as exc: 164 # Suppress StopIteration unless it's the same exception that 165 # was passed to throw(). This prevents a StopIteration 166 # raised inside the "with" statement from being suppressed.

File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:118, in map_httpcore_exceptions() 117 message = str(exc) --> 118 raise mapped_exc(message) from exc

RemoteProtocolError: Server disconnected without sending a response.

The above exception was the direct cause of the following exception:

APIConnectionError Traceback (most recent call last) Cell In[11], line 1 ----> 1 faqs = doc2cache.invoke({"doc": sample_doc})

File ~\litellm-qa-env\Lib\site-packages\langchain_core\runnables\base.py:3034, in RunnableSequence.invoke(self, input, config, **kwargs) 3032 input = context.run(step.invoke, input, config, **kwargs) 3033 else: -> 3034 input = context.run(step.invoke, input, config) 3035 # finish the root run 3036 except BaseException as e:

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:387, in BaseLLM.invoke(self, input, config, stop, **kwargs) 376 @override 377 def invoke( 378 self, (...) 383 **kwargs: Any, 384 ) -> str: 385 config = ensure_config(config) 386 return ( --> 387 self.generate_prompt( 388 [self._convert_input(input)], 389 stop=stop, 390 callbacks=config.get("callbacks"), 391 tags=config.get("tags"), 392 metadata=config.get("metadata"), 393 run_name=config.get("run_name"), 394 run_id=config.pop("run_id", None), 395 **kwargs, 396 ) 397 .generations[0][0] 398 .text 399 )

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:764, in BaseLLM.generate_prompt(self, prompts, stop, callbacks, **kwargs) 755 @override 756 def generate_prompt( 757 self, (...) 761 **kwargs: Any, 762 ) -> LLMResult: 763 prompt_strings = [p.to_string() for p in prompts] --> 764 return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:971, in BaseLLM.generate(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs) 956 if (self.cache is None and get_llm_cache() is None) or self.cache is False: 957 run_managers = [ 958 callback_manager.on_llm_start( 959 self._serialized, (...) 969 ) 970 ] --> 971 return self._generate_helper( 972 prompts, 973 stop, 974 run_managers, 975 new_arg_supported=bool(new_arg_supported), 976 **kwargs, 977 ) 978 if len(missing_prompts) > 0: 979 run_managers = [ 980 callback_managers[idx].on_llm_start( 981 self._serialized, (...) 988 for idx in missing_prompt_idxs 989 ]

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:790, in BaseLLM._generate_helper(self, prompts, stop, run_managers, new_arg_supported, **kwargs) 779 def _generate_helper( 780 self, 781 prompts: list[str], (...) 786 **kwargs: Any, 787 ) -> LLMResult: 788 try: 789 output = ( --> 790 self._generate( 791 prompts, 792 stop=stop, 793 # TODO: support multiple run managers 794 run_manager=run_managers[0] if run_managers else None, 795 **kwargs, 796 ) 797 if new_arg_supported 798 else self._generate(prompts, stop=stop) 799 ) 800 except BaseException as e: 801 for run_manager in run_managers:

File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:463, in BaseOpenAI._generate(self, prompts, stop, run_manager, **kwargs) 451 choices.append( 452 { 453 "text": generation.text, (...) 460 } 461 ) 462 else: --> 463 response = completion_with_retry( 464 self, prompt=_prompts, run_manager=run_manager, **params 465 ) 466 if not isinstance(response, dict): 467 # V1 client returns the response in an PyDantic object instead of 468 # dict. For the transition period, we deep convert it to dict. 469 response = response.dict()

File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:121, in completion_with_retry(llm, run_manager, **kwargs) 119 """Use tenacity to retry the completion call.""" 120 if is_openai_v1(): --> 121 return llm.client.create(**kwargs) 123 retry_decorator = _create_retry_decorator(llm, run_manager=run_manager) 125 @retry_decorator 126 def _completion_with_retry(**kwargs: Any) -> Any:

File ~\litellm-qa-env\Lib\site-packages\openai_utils_utils.py:279, in required_args..inner..wrapper(*args, **kwargs) 277 msg = f"Missing required argument: {quote(missing[0])}" 278 raise TypeError(msg) --> 279 return func(*args, **kwargs)

File ~\litellm-qa-env\Lib\site-packages\openai\resources\completions.py:545, in Completions.create(self, model, prompt, best_of, echo, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, seed, stop, stream, stream_options, suffix, temperature, top_p, user, extra_headers, extra_query, extra_body, timeout) 516 @required_args(["model", "prompt"], ["model", "prompt", "stream"]) 517 def create( 518 self, (...) 543 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, 544 ) -> Completion | Stream[Completion]: --> 545 return self._post( 546 "/completions", 547 body=maybe_transform( 548 { 549 "model": model, 550 "prompt": prompt, 551 "best_of": best_of, 552 "echo": echo, 553 "frequency_penalty": frequency_penalty, 554 "logit_bias": logit_bias, 555 "logprobs": logprobs, 556 "max_tokens": max_tokens, 557 "n": n, 558 "presence_penalty": presence_penalty, 559 "seed": seed, 560 "stop": stop, 561 "stream": stream, 562 "stream_options": stream_options, 563 "suffix": suffix, 564 "temperature": temperature, 565 "top_p": top_p, 566 "user": user, 567 }, 568 completion_create_params.CompletionCreateParamsStreaming 569 if stream 570 else completion_create_params.CompletionCreateParamsNonStreaming, 571 ), 572 options=make_request_options( 573 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout 574 ), 575 cast_to=Completion, 576 stream=stream or False, 577 stream_cls=Stream[Completion], 578 )

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1276, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls) 1262 def post( 1263 self, 1264 path: str, (...) 1271 stream_cls: type[_StreamT] | None = None, 1272 ) -> ResponseT | _StreamT: 1273 opts = FinalRequestOptions.construct( 1274 method="post", url=path, json_data=body, files=to_httpx_files(files), **options 1275 ) -> 1276 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:949, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls) 946 else: 947 retries_taken = 0 --> 949 return self._request( 950 cast_to=cast_to, 951 options=options, 952 stream=stream, 953 stream_cls=stream_cls, 954 retries_taken=retries_taken, 955 )

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls) 1010 log.debug("Encountered Exception", exc_info=True) 1012 if remaining_retries > 0: -> 1013 return self._retry_request( 1014 input_options, 1015 cast_to, 1016 retries_taken=retries_taken, 1017 stream=stream, 1018 stream_cls=stream_cls, 1019 response_headers=None, 1020 ) 1022 log.debug("Raising connection error") 1023 raise APIConnectionError(request=request) from err

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls) 1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a 1088 # different thread if necessary. 1089 time.sleep(timeout) -> 1091 return self._request( 1092 options=options, 1093 cast_to=cast_to, 1094 retries_taken=retries_taken + 1, 1095 stream=stream, 1096 stream_cls=stream_cls, 1097 )

File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1023, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls) 1013 return self._retry_request( 1014 input_options, 1015 cast_to, (...) 1019 response_headers=None, 1020 ) 1022 log.debug("Raising connection error") -> 1023 raise APIConnectionError(request=request) from err 1025 log.debug( 1026 'HTTP Response: %s %s "%i %s" %s', 1027 request.method, (...) 1031 response.headers, 1032 ) 1033 log.debug("request_id: %s", response.headers.get("x-request-id"))

APIConnectionError: Connection error.

May 09 '25 19:05 amscosta

Using: llama = VLLMOpenAI( openai_api_key="EMPTY", openai_api_base="http://localhost:8000/v1", model_name=MODEL_NAME, temperature=0.1 )

May 12 '25 15:05 amscosta

redis-ai-resources redis-ai-resources copied to clipboard

redis-ai-resources/python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb ERROR #2

redis-ai-resources
redis-ai-resources copied to clipboard