Testset generation issue with ollama, gets stuck at Generating common concepts
[X] I have checked the documentation and related resources and couldn't resolve my bug.
Describe the bug
I am unable to create test data set, using Ollama models , it is getting stuck at the Generating common concepts and timing out
from ragas.llms import LangchainLLMWrapper
from langchain_ollama import ChatOllama,OllamaEmbeddings
from ragas.testset import TestsetGenerator
generator_llm = LangchainLLMWrapper(ChatOllama(model="qwen2.5:32b"))
generator_embeddings = LangchainEmbeddingsWrapper(OllamaEmbeddings(model="llama3.2:3b-instruct-q4_1"))
generator = TestsetGenerator(llm=generator_llm)
dataset = generator.generate_with_langchain_docs(documents, testset_size=10,transforms_embedding_model=generator_embeddings)
Ragas version: 0.2.2 Python version: 3.11.5
Code to Reproduce Share code to reproduce the issue
Error trace
{
"name": "ResponseError",
"message": "do encode request: Post \"http://127.0.0.1:34247/tokenize\": EOF",
"stack": "---------------------------------------------------------------------------
ResponseError Traceback (most recent call last)
Cell In[28], line 4
1 from ragas.testset import TestsetGenerator
3 generator = TestsetGenerator(llm=generator_llm)
----> 4 dataset = generator.generate_with_langchain_docs(documents, testset_size=10,transforms_embedding_model=generator_embeddings)
File ~/miniconda3/lib/python3.11/site-packages/ragas/testset/synthesizers/generate.py:109, in TestsetGenerator.generate_with_langchain_docs(self, documents, testset_size, transforms, transforms_llm, transforms_embedding_model, query_distribution, run_config, callbacks, with_debugging_logs, raise_exceptions)
106 apply_transforms(kg, transforms)
107 self.knowledge_graph = kg
--> 109 return self.generate(
110 testset_size=testset_size,
111 query_distribution=query_distribution,
112 run_config=run_config,
113 callbacks=callbacks,
114 with_debugging_logs=with_debugging_logs,
115 raise_exceptions=raise_exceptions,
116 )
File ~/miniconda3/lib/python3.11/site-packages/ragas/testset/synthesizers/generate.py:203, in TestsetGenerator.generate(self, testset_size, query_distribution, run_config, callbacks, with_debugging_logs, raise_exceptions)
200 for i, (scenario, _) in enumerate(query_distribution):
201 exec.submit(scenario.generate_scenarios, splits[i], self.knowledge_graph)
--> 203 scenario_sample_list: t.List[t.List[BaseScenario]] = exec.results()
204 scenario_generation_rm.on_chain_end(
205 outputs={\"scenario_sample_list\": scenario_sample_list}
206 )
208 # new group for Generation of Samples
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:146, in Executor.results(self)
142 results.append(r)
144 return results
--> 146 results = asyncio.run(_aresults())
147 sorted_results = sorted(results, key=lambda x: x[0])
148 return [r[1] for r in sorted_results]
File ~/miniconda3/lib/python3.11/site-packages/nest_asyncio.py:30, in _patch_asyncio.<locals>.run(main, debug)
28 task = asyncio.ensure_future(main)
29 try:
---> 30 return loop.run_until_complete(task)
31 finally:
32 if not task.done():
File ~/miniconda3/lib/python3.11/site-packages/nest_asyncio.py:98, in _patch_loop.<locals>.run_until_complete(self, future)
95 if not f.done():
96 raise RuntimeError(
97 'Event loop stopped before Future completed.')
---> 98 return f.result()
File ~/miniconda3/lib/python3.11/asyncio/futures.py:203, in Future.result(self)
201 self.__log_traceback = False
202 if self._exception is not None:
--> 203 raise self._exception.with_traceback(self._exception_tb)
204 return self._result
File ~/miniconda3/lib/python3.11/asyncio/tasks.py:267, in Task.__step(***failed resolving arguments***)
263 try:
264 if exc is None:
265 # We use the `send` method directly, because coroutines
266 # don't have `__iter__` and `__next__` methods.
--> 267 result = coro.send(None)
268 else:
269 result = coro.throw(exc)
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:141, in Executor.results.<locals>._aresults()
132 results = []
133 for future in tqdm(
134 await futures_as_they_finish,
135 desc=self.desc,
(...)
139 disable=not self.show_progress,
140 ):
--> 141 r = await future
142 results.append(r)
144 return results
File ~/miniconda3/lib/python3.11/asyncio/tasks.py:605, in as_completed.<locals>._wait_for_one()
602 if f is None:
603 # Dummy value from _on_timeout().
604 raise exceptions.TimeoutError
--> 605 return f.result()
File ~/miniconda3/lib/python3.11/asyncio/futures.py:203, in Future.result(self)
201 self.__log_traceback = False
202 if self._exception is not None:
--> 203 raise self._exception.with_traceback(self._exception_tb)
204 return self._result
File ~/miniconda3/lib/python3.11/asyncio/tasks.py:267, in Task.__step(***failed resolving arguments***)
263 try:
264 if exc is None:
265 # We use the `send` method directly, because coroutines
266 # don't have `__iter__` and `__next__` methods.
--> 267 result = coro.send(None)
268 else:
269 result = coro.throw(exc)
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:36, in as_completed.<locals>.sema_coro(coro)
34 async def sema_coro(coro):
35 async with semaphore:
---> 36 return await coro
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:81, in Executor.wrap_callable_with_index.<locals>.wrapped_callable_async(*args, **kwargs)
79 except Exception as e:
80 if self.raise_exceptions:
---> 81 raise e
82 else:
83 exec_name = type(e).__name__
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:78, in Executor.wrap_callable_with_index.<locals>.wrapped_callable_async(*args, **kwargs)
76 result = np.nan
77 try:
---> 78 result = await callable(*args, **kwargs)
79 except Exception as e:
80 if self.raise_exceptions:
File ~/miniconda3/lib/python3.11/site-packages/ragas/testset/synthesizers/base.py:89, in BaseSynthesizer.generate_scenarios(self, n, knowledge_graph, callbacks)
83 callbacks = callbacks or []
84 scenario_generation_rm, scenario_generation_group = new_group(
85 name=self.name,
86 inputs={\"n\": n, \"knowledge_graph\": str(knowledge_graph)},
87 callbacks=callbacks,
88 )
---> 89 scenarios = await self._generate_scenarios(
90 n, knowledge_graph, scenario_generation_group
91 )
92 scenario_generation_rm.on_chain_end(outputs={\"scenarios\": scenarios})
93 return scenarios
File ~/miniconda3/lib/python3.11/site-packages/ragas/testset/synthesizers/abstract_query.py:263, in ComparativeAbstractQuerySynthesizer._generate_scenarios(self, n, knowledge_graph, callbacks)
250 keyphrases.extend(keyphrases_node)
252 kw_list.append(
253 {
254 \"data\": KeyphrasesAndNumConcepts(
(...)
260 }
261 )
--> 263 common_concepts: t.List[Concepts] = run_async_batch(
264 desc=\"Generating common_concepts\",
265 func=self.common_concepts_prompt.generate,
266 kwargs_list=kw_list,
267 )
269 # sample everything n times
270 for cluster, common_concept in zip(node_clusters, common_concepts):
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:167, in run_async_batch(desc, func, kwargs_list)
164 for kwargs in kwargs_list:
165 executor.submit(func, **kwargs)
--> 167 return executor.results()
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:146, in Executor.results(self)
142 results.append(r)
144 return results
--> 146 results = asyncio.run(_aresults())
147 sorted_results = sorted(results, key=lambda x: x[0])
148 return [r[1] for r in sorted_results]
File ~/miniconda3/lib/python3.11/site-packages/nest_asyncio.py:30, in _patch_asyncio.<locals>.run(main, debug)
28 task = asyncio.ensure_future(main)
29 try:
---> 30 return loop.run_until_complete(task)
31 finally:
32 if not task.done():
File ~/miniconda3/lib/python3.11/site-packages/nest_asyncio.py:98, in _patch_loop.<locals>.run_until_complete(self, future)
95 if not f.done():
96 raise RuntimeError(
97 'Event loop stopped before Future completed.')
---> 98 return f.result()
File ~/miniconda3/lib/python3.11/asyncio/futures.py:203, in Future.result(self)
201 self.__log_traceback = False
202 if self._exception is not None:
--> 203 raise self._exception.with_traceback(self._exception_tb)
204 return self._result
File ~/miniconda3/lib/python3.11/asyncio/tasks.py:267, in Task.__step(***failed resolving arguments***)
263 try:
264 if exc is None:
265 # We use the `send` method directly, because coroutines
266 # don't have `__iter__` and `__next__` methods.
--> 267 result = coro.send(None)
268 else:
269 result = coro.throw(exc)
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:141, in Executor.results.<locals>._aresults()
132 results = []
133 for future in tqdm(
134 await futures_as_they_finish,
135 desc=self.desc,
(...)
139 disable=not self.show_progress,
140 ):
--> 141 r = await future
142 results.append(r)
144 return results
File ~/miniconda3/lib/python3.11/asyncio/tasks.py:605, in as_completed.<locals>._wait_for_one()
602 if f is None:
603 # Dummy value from _on_timeout().
604 raise exceptions.TimeoutError
--> 605 return f.result()
File ~/miniconda3/lib/python3.11/asyncio/futures.py:203, in Future.result(self)
201 self.__log_traceback = False
202 if self._exception is not None:
--> 203 raise self._exception.with_traceback(self._exception_tb)
204 return self._result
File ~/miniconda3/lib/python3.11/asyncio/tasks.py:267, in Task.__step(***failed resolving arguments***)
263 try:
264 if exc is None:
265 # We use the `send` method directly, because coroutines
266 # don't have `__iter__` and `__next__` methods.
--> 267 result = coro.send(None)
268 else:
269 result = coro.throw(exc)
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:36, in as_completed.<locals>.sema_coro(coro)
34 async def sema_coro(coro):
35 async with semaphore:
---> 36 return await coro
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:81, in Executor.wrap_callable_with_index.<locals>.wrapped_callable_async(*args, **kwargs)
79 except Exception as e:
80 if self.raise_exceptions:
---> 81 raise e
82 else:
83 exec_name = type(e).__name__
File ~/miniconda3/lib/python3.11/site-packages/ragas/executor.py:78, in Executor.wrap_callable_with_index.<locals>.wrapped_callable_async(*args, **kwargs)
76 result = np.nan
77 try:
---> 78 result = await callable(*args, **kwargs)
79 except Exception as e:
80 if self.raise_exceptions:
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:127, in PydanticPrompt.generate(self, llm, data, temperature, stop, callbacks)
124 callbacks = callbacks or []
126 # this is just a special case of generate_multiple
--> 127 output_single = await self.generate_multiple(
128 llm=llm,
129 data=data,
130 n=1,
131 temperature=temperature,
132 stop=stop,
133 callbacks=callbacks,
134 )
135 return output_single[0]
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:196, in PydanticPrompt.generate_multiple(self, llm, data, n, temperature, stop, callbacks)
194 output_string = resp.generations[0][i].text
195 try:
--> 196 answer = await parser.parse_output_string(
197 output_string=output_string,
198 prompt_value=prompt_value,
199 llm=llm,
200 callbacks=prompt_cb,
201 max_retries=3,
202 )
203 processed_output = self.process_output(answer, data) # type: ignore
204 output_models.append(processed_output)
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:406, in RagasOutputParser.parse_output_string(self, output_string, prompt_value, llm, callbacks, max_retries)
400 if max_retries != 0:
401 retry_rm, retry_cb = new_group(
402 name=\"fix_output_format\",
403 inputs={\"output_string\": output_string},
404 callbacks=callbacks,
405 )
--> 406 fixed_output_string = await fix_output_format_prompt.generate(
407 llm=llm,
408 data=OutputStringAndPrompt(
409 output_string=output_string,
410 prompt_value=prompt_value.to_string(),
411 ),
412 callbacks=retry_cb,
413 )
414 retry_rm.on_chain_end({\"fixed_output_string\": fixed_output_string})
415 return await self.parse_output_string(
416 output_string=fixed_output_string.text,
417 prompt_value=prompt_value,
(...)
420 callbacks=callbacks,
421 )
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:127, in PydanticPrompt.generate(self, llm, data, temperature, stop, callbacks)
124 callbacks = callbacks or []
126 # this is just a special case of generate_multiple
--> 127 output_single = await self.generate_multiple(
128 llm=llm,
129 data=data,
130 n=1,
131 temperature=temperature,
132 stop=stop,
133 callbacks=callbacks,
134 )
135 return output_single[0]
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:196, in PydanticPrompt.generate_multiple(self, llm, data, n, temperature, stop, callbacks)
194 output_string = resp.generations[0][i].text
195 try:
--> 196 answer = await parser.parse_output_string(
197 output_string=output_string,
198 prompt_value=prompt_value,
199 llm=llm,
200 callbacks=prompt_cb,
201 max_retries=3,
202 )
203 processed_output = self.process_output(answer, data) # type: ignore
204 output_models.append(processed_output)
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:406, in RagasOutputParser.parse_output_string(self, output_string, prompt_value, llm, callbacks, max_retries)
400 if max_retries != 0:
401 retry_rm, retry_cb = new_group(
402 name=\"fix_output_format\",
403 inputs={\"output_string\": output_string},
404 callbacks=callbacks,
405 )
--> 406 fixed_output_string = await fix_output_format_prompt.generate(
407 llm=llm,
408 data=OutputStringAndPrompt(
409 output_string=output_string,
410 prompt_value=prompt_value.to_string(),
411 ),
412 callbacks=retry_cb,
413 )
414 retry_rm.on_chain_end({\"fixed_output_string\": fixed_output_string})
415 return await self.parse_output_string(
416 output_string=fixed_output_string.text,
417 prompt_value=prompt_value,
(...)
420 callbacks=callbacks,
421 )
[... skipping similar frames: PydanticPrompt.generate at line 127 (13 times), PydanticPrompt.generate_multiple at line 196 (12 times), RagasOutputParser.parse_output_string at line 406 (12 times)]
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:196, in PydanticPrompt.generate_multiple(self, llm, data, n, temperature, stop, callbacks)
194 output_string = resp.generations[0][i].text
195 try:
--> 196 answer = await parser.parse_output_string(
197 output_string=output_string,
198 prompt_value=prompt_value,
199 llm=llm,
200 callbacks=prompt_cb,
201 max_retries=3,
202 )
203 processed_output = self.process_output(answer, data) # type: ignore
204 output_models.append(processed_output)
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:406, in RagasOutputParser.parse_output_string(self, output_string, prompt_value, llm, callbacks, max_retries)
400 if max_retries != 0:
401 retry_rm, retry_cb = new_group(
402 name=\"fix_output_format\",
403 inputs={\"output_string\": output_string},
404 callbacks=callbacks,
405 )
--> 406 fixed_output_string = await fix_output_format_prompt.generate(
407 llm=llm,
408 data=OutputStringAndPrompt(
409 output_string=output_string,
410 prompt_value=prompt_value.to_string(),
411 ),
412 callbacks=retry_cb,
413 )
414 retry_rm.on_chain_end({\"fixed_output_string\": fixed_output_string})
415 return await self.parse_output_string(
416 output_string=fixed_output_string.text,
417 prompt_value=prompt_value,
(...)
420 callbacks=callbacks,
421 )
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:127, in PydanticPrompt.generate(self, llm, data, temperature, stop, callbacks)
124 callbacks = callbacks or []
126 # this is just a special case of generate_multiple
--> 127 output_single = await self.generate_multiple(
128 llm=llm,
129 data=data,
130 n=1,
131 temperature=temperature,
132 stop=stop,
133 callbacks=callbacks,
134 )
135 return output_single[0]
File ~/miniconda3/lib/python3.11/site-packages/ragas/prompt/pydantic_prompt.py:183, in PydanticPrompt.generate_multiple(self, llm, data, n, temperature, stop, callbacks)
176 prompt_rm, prompt_cb = new_group(
177 name=self.name,
178 inputs={\"data\": processed_data},
179 callbacks=callbacks,
180 metadata={\"type\": ChainType.RAGAS_PROMPT},
181 )
182 prompt_value = PromptValue(text=self.to_string(processed_data))
--> 183 resp = await llm.generate(
184 prompt_value,
185 n=n,
186 temperature=temperature,
187 stop=stop,
188 callbacks=prompt_cb,
189 )
191 output_models = []
192 parser = RagasOutputParser(pydantic_object=self.output_model)
File ~/miniconda3/lib/python3.11/site-packages/ragas/llms/base.py:100, in BaseRagasLLM.generate(self, prompt, n, temperature, stop, callbacks)
95 temperature = self.get_temperature(n)
97 agenerate_text_with_retry = add_async_retry(
98 self.agenerate_text, self.run_config
99 )
--> 100 result = await agenerate_text_with_retry(
101 prompt=prompt,
102 n=n,
103 temperature=temperature,
104 stop=stop,
105 callbacks=callbacks,
106 )
108 # check there are no max_token issues
109 if not self.is_finished(result):
File ~/miniconda3/lib/python3.11/site-packages/tenacity/asyncio/__init__.py:189, in AsyncRetrying.wraps.<locals>.async_wrapped(*args, **kwargs)
187 copy = self.copy()
188 async_wrapped.statistics = copy.statistics # type: ignore[attr-defined]
--> 189 return await copy(fn, *args, **kwargs)
File ~/miniconda3/lib/python3.11/site-packages/tenacity/asyncio/__init__.py:111, in AsyncRetrying.__call__(self, fn, *args, **kwargs)
109 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
110 while True:
--> 111 do = await self.iter(retry_state=retry_state)
112 if isinstance(do, DoAttempt):
113 try:
File ~/miniconda3/lib/python3.11/site-packages/tenacity/asyncio/__init__.py:153, in AsyncRetrying.iter(self, retry_state)
151 result = None
152 for action in self.iter_state.actions:
--> 153 result = await action(retry_state)
154 return result
File ~/miniconda3/lib/python3.11/site-packages/tenacity/_utils.py:99, in wrap_to_async_func.<locals>.inner(*args, **kwargs)
98 async def inner(*args: typing.Any, **kwargs: typing.Any) -> typing.Any:
---> 99 return call(*args, **kwargs)
File ~/miniconda3/lib/python3.11/site-packages/tenacity/__init__.py:418, in BaseRetrying._post_stop_check_actions.<locals>.exc_check(rs)
416 retry_exc = self.retry_error_cls(fut)
417 if self.reraise:
--> 418 raise retry_exc.reraise()
419 raise retry_exc from fut.exception()
File ~/miniconda3/lib/python3.11/site-packages/tenacity/__init__.py:185, in RetryError.reraise(self)
183 def reraise(self) -> t.NoReturn:
184 if self.last_attempt.failed:
--> 185 raise self.last_attempt.result()
186 raise self
File ~/miniconda3/lib/python3.11/concurrent/futures/_base.py:449, in Future.result(self, timeout)
447 raise CancelledError()
448 elif self._state == FINISHED:
--> 449 return self.__get_result()
451 self._condition.wait(timeout)
453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File ~/miniconda3/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self)
399 if self._exception:
400 try:
--> 401 raise self._exception
402 finally:
403 # Break a reference cycle with the exception in self._exception
404 self = None
File ~/miniconda3/lib/python3.11/site-packages/tenacity/asyncio/__init__.py:114, in AsyncRetrying.__call__(self, fn, *args, **kwargs)
112 if isinstance(do, DoAttempt):
113 try:
--> 114 result = await fn(*args, **kwargs)
115 except BaseException: # noqa: B902
116 retry_state.set_exception(sys.exc_info()) # type: ignore[arg-type]
File ~/miniconda3/lib/python3.11/site-packages/ragas/llms/base.py:228, in LangchainLLMWrapper.agenerate_text(self, prompt, n, temperature, stop, callbacks)
220 return await self.langchain_llm.agenerate_prompt(
221 prompts=[prompt],
222 n=n,
(...)
225 callbacks=callbacks,
226 )
227 else:
--> 228 result = await self.langchain_llm.agenerate_prompt(
229 prompts=[prompt] * n,
230 temperature=temperature,
231 stop=stop,
232 callbacks=callbacks,
233 )
234 # make LLMResult.generation appear as if it was n_completions
235 # note that LLMResult.runs is still a list that represents each run
236 generations = [[g[0] for g in result.generations]]
File ~/miniconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:796, in BaseChatModel.agenerate_prompt(self, prompts, stop, callbacks, **kwargs)
788 async def agenerate_prompt(
789 self,
790 prompts: list[PromptValue],
(...)
793 **kwargs: Any,
794 ) -> LLMResult:
795 prompt_messages = [p.to_messages() for p in prompts]
--> 796 return await self.agenerate(
797 prompt_messages, stop=stop, callbacks=callbacks, **kwargs
798 )
File ~/miniconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:756, in BaseChatModel.agenerate(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)
743 if run_managers:
744 await asyncio.gather(
745 *[
746 run_manager.on_llm_end(
(...)
754 ]
755 )
--> 756 raise exceptions[0]
757 flattened_outputs = [
758 LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item, union-attr]
759 for res in results
760 ]
761 llm_output = self._combine_llm_outputs([res.llm_output for res in results]) # type: ignore[union-attr]
File ~/miniconda3/lib/python3.11/asyncio/tasks.py:267, in Task.__step(***failed resolving arguments***)
263 try:
264 if exc is None:
265 # We use the `send` method directly, because coroutines
266 # don't have `__iter__` and `__next__` methods.
--> 267 result = coro.send(None)
268 else:
269 result = coro.throw(exc)
File ~/miniconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:924, in BaseChatModel._agenerate_with_cache(self, messages, stop, run_manager, **kwargs)
922 else:
923 if inspect.signature(self._agenerate).parameters.get(\"run_manager\"):
--> 924 result = await self._agenerate(
925 messages, stop=stop, run_manager=run_manager, **kwargs
926 )
927 else:
928 result = await self._agenerate(messages, stop=stop, **kwargs)
File ~/miniconda3/lib/python3.11/site-packages/langchain_ollama/chat_models.py:731, in ChatOllama._agenerate(self, messages, stop, run_manager, **kwargs)
724 async def _agenerate(
725 self,
726 messages: List[BaseMessage],
(...)
729 **kwargs: Any,
730 ) -> ChatResult:
--> 731 final_chunk = await self._achat_stream_with_aggregation(
732 messages, stop, run_manager, verbose=self.verbose, **kwargs
733 )
734 generation_info = final_chunk.generation_info
735 chat_generation = ChatGeneration(
736 message=AIMessage(
737 content=final_chunk.text,
(...)
741 generation_info=generation_info,
742 )
File ~/miniconda3/lib/python3.11/site-packages/langchain_ollama/chat_models.py:588, in ChatOllama._achat_stream_with_aggregation(self, messages, stop, run_manager, verbose, **kwargs)
579 async def _achat_stream_with_aggregation(
580 self,
581 messages: List[BaseMessage],
(...)
585 **kwargs: Any,
586 ) -> ChatGenerationChunk:
587 final_chunk = None
--> 588 async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
589 if not isinstance(stream_resp, str):
590 chunk = ChatGenerationChunk(
591 message=AIMessageChunk(
592 content=(
(...)
605 ),
606 )
File ~/miniconda3/lib/python3.11/site-packages/langchain_ollama/chat_models.py:489, in ChatOllama._acreate_chat_stream(self, messages, stop, **kwargs)
479 yield await self._async_client.chat(
480 model=params[\"model\"],
481 messages=ollama_messages,
(...)
486 tools=kwargs[\"tools\"],
487 ) # type:ignore
488 else:
--> 489 async for part in await self._async_client.chat(
490 model=params[\"model\"],
491 messages=ollama_messages,
492 stream=True,
493 options=Options(**params[\"options\"]),
494 keep_alive=params[\"keep_alive\"],
495 format=params[\"format\"],
496 ): # type:ignore
497 yield part
File ~/miniconda3/lib/python3.11/site-packages/ollama/_client.py:499, in AsyncClient._stream.<locals>.inner()
497 except httpx.HTTPStatusError as e:
498 await e.response.aread()
--> 499 raise ResponseError(e.response.text, e.response.status_code) from None
501 async for line in r.aiter_lines():
502 partial = json.loads(line)
ResponseError: do encode request: Post \"http://127.0.0.1:34247/tokenize\": EOF"
}
Expected behavior A clear and concise description of what you expected to happen.
Additional context Add any other context about the problem here.
I have tried other embedding models like nomic too..
This is a duplicate of https://github.com/explodinggradients/ragas/issues/1170 - sadly we don't support ollama model right now and we are working to fix this issue
do keep an eye on the root issue for update
@jjmachan, is it ollama specific issue, and is there a way to use local llms through other methods.. like vllm/huggingface for test set generation.
@rajuptvs vllm works really well. Just ref https://docs.vllm.ai/en/v0.6.1/serving/serving_with_langchain.html to create the langchain llm object and you can pass it through to the llm argument
@jjmachan how to create an embedding object with vllm? or it is not necessary?
Did anyone solve it?