ragas icon indicating copy to clipboard operation
ragas copied to clipboard

error: answer_relevancy with Bedrock model

Open tinomaxthayil opened this issue 1 year ago • 4 comments

Describe the bug I am getting an error when using Bedrock embedding model with answer relevancy metric.

Ragas version: 0.0.21 Python version: 3.10.8

Code to Reproduce from ragas import evaluate from ragas.metrics import answer_relevancy import nest_asyncio # CHECK NOTES

NOTES: Only used when running on a jupyter notebook, otherwise comment or remove this function.

nest_asyncio.apply()

result = evaluate( amnesty_qa["eval"].select(range(3)), metrics=[answer_relevancy], llm=bedrock_model, embeddings=bedrock_embeddings, )

result

Error trace

ValidationException Traceback (most recent call last) File /usr/local/lib/python3.10/site-packages/langchain_community/embeddings/bedrock.py:131, in BedrockEmbeddings._embedding_func(self, text) 129 try: 130 # invoke bedrock API --> 131 response = self.client.invoke_model( 132 body=body, 133 modelId=self.model_id, 134 accept="application/json", 135 contentType="application/json", 136 ) 138 # format output based on provider

File /usr/local/lib/python3.10/site-packages/botocore/client.py:553, in ClientCreator._create_api_method.._api_call(self, *args, **kwargs) 552 # The "self" in this scope is referring to the BaseClient. --> 553 return self._make_api_call(operation_name, kwargs)

File /usr/local/lib/python3.10/site-packages/botocore/client.py:1009, in BaseClient._make_api_call(self, operation_name, api_params) 1008 error_class = self.exceptions.from_code(error_code) -> 1009 raise error_class(parsed_response, operation_name) 1010 else:

ValidationException: An error occurred (ValidationException) when calling the InvokeModel operation: Malformed input request: expected minLength: 1, actual: 0, please reformat your input and try again.

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last) Cell In[42], line 1 ----> 1 result = evaluate( 2 amnesty_qa["eval"].select(range(3)), 3 metrics=[answer_relevancy], 4 llm=bedrock_model, 5 embeddings=bedrock_embeddings, 6 )

File ~/Projects/ragas/src/ragas/evaluation.py:204, in evaluate(dataset, metrics, llm, embeddings, callbacks, is_async, max_workers, raise_exceptions, column_map) 201 if not evaluation_group_cm.ended: 202 evaluation_rm.on_chain_error(e) --> 204 raise e 205 else: 206 result = Result( 207 scores=Dataset.from_list(scores), 208 dataset=dataset, 209 binary_columns=binary_metrics, 210 )

File ~/Projects/ragas/src/ragas/evaluation.py:187, in evaluate(dataset, metrics, llm, embeddings, callbacks, is_async, max_workers, raise_exceptions, column_map) 184 scores = [] 185 try: 186 # get the results --> 187 results = executor.results() 188 # convert results to dataset_like 189 for i, _ in enumerate(dataset):

File ~/Projects/ragas/src/ragas/executor.py:119, in Executor.results(self) 117 r = (-1, np.nan) 118 if self.raise_exceptions: --> 119 raise e 120 finally: 121 results.append(r)

File ~/Projects/ragas/src/ragas/executor.py:115, in Executor.results(self) 113 r = (-1, np.nan) 114 try: --> 115 r = future.result() 116 except Exception as e: 117 r = (-1, np.nan)

File /usr/local/Cellar/python@3.10/3.10.8/Frameworks/Python.framework/Versions/3.10/lib/python3.10/concurrent/futures/_base.py:451, in Future.result(self, timeout) 449 raise CancelledError() 450 elif self._state == FINISHED: --> 451 return self.__get_result() 453 self._condition.wait(timeout) 455 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File /usr/local/Cellar/python@3.10/3.10.8/Frameworks/Python.framework/Versions/3.10/lib/python3.10/concurrent/futures/_base.py:403, in Future.__get_result(self) 401 if self._exception: 402 try: --> 403 raise self._exception 404 finally: 405 # Break a reference cycle with the exception in self._exception 406 self = None

File /usr/local/Cellar/python@3.10/3.10.8/Frameworks/Python.framework/Versions/3.10/lib/python3.10/concurrent/futures/thread.py:58, in _WorkItem.run(self) 55 return 57 try: ---> 58 result = self.fn(*self.args, **self.kwargs) 59 except BaseException as exc: 60 self.future.set_exception(exc)

File ~/Projects/ragas/src/ragas/executor.py:37, in Executor.wrap_callable_with_index..wrapped_callable(*args, **kwargs) 36 def wrapped_callable(*args, **kwargs): ---> 37 return counter, callable(*args, **kwargs)

File ~/Projects/ragas/src/ragas/metrics/base.py:76, in Metric.score(self, row, callbacks) 74 if not group_cm.ended: 75 rm.on_chain_error(e) ---> 76 raise e 77 else: 78 if not group_cm.ended:

File ~/Projects/ragas/src/ragas/metrics/base.py:72, in Metric.score(self, row, callbacks) 68 rm, group_cm = new_group( 69 self.name, inputs=row, callbacks=callbacks, is_async=False 70 ) 71 try: ---> 72 score = self._score(row=row, callbacks=group_cm) 73 except Exception as e: 74 if not group_cm.ended:

File ~/Projects/ragas/src/ragas/metrics/_answer_relevance.py:133, in AnswerRelevancy._score(self, row, callbacks) 124 result = self.llm.generate_text( 125 prompt, 126 n=self.strictness, 127 callbacks=callbacks, 128 ) 129 response = [ 130 json_loader.safe_load(r.text, self.llm) for r in result.generations[0] 131 ] --> 133 return self._calculate_score(response, row)

File ~/Projects/ragas/src/ragas/metrics/_answer_relevance.py:111, in AnswerRelevancy._calculate_score(self, response, row) 101 gen_questions = [ 102 item.get("question", "") for item in response if isinstance(item, dict) 103 ] 104 committal = np.any( 105 [ 106 item.get("noncommittal", False) (...) 109 ] 110 ) --> 111 cosine_sim = self.calculate_similarity(question, gen_questions) 112 score = cosine_sim.mean() * int(not committal) 114 return score

File ~/Projects/ragas/src/ragas/metrics/_answer_relevance.py:87, in AnswerRelevancy.calculate_similarity(self, question, generated_questions) 84 assert self.embeddings is not None 85 question_vec = np.asarray(self.embeddings.embed_query(question)).reshape(1, -1) 86 gen_question_vec = np.asarray( ---> 87 self.embeddings.embed_documents(generated_questions) 88 ) 89 norm = np.linalg.norm(gen_question_vec, axis=1) * np.linalg.norm( 90 question_vec, axis=1 91 ) 92 return ( 93 np.dot(gen_question_vec, question_vec.T).reshape( 94 -1, 95 ) 96 / norm 97 )

File /usr/local/lib/python3.10/site-packages/langchain_community/embeddings/bedrock.py:159, in BedrockEmbeddings.embed_documents(self, texts) 157 results = [] 158 for text in texts: --> 159 response = self._embedding_func(text) 160 results.append(response) 161 return results

File /usr/local/lib/python3.10/site-packages/langchain_community/embeddings/bedrock.py:146, in BedrockEmbeddings._embedding_func(self, text) 144 return response_body.get("embedding") 145 except Exception as e: --> 146 raise ValueError(f"Error raised by inference endpoint: {e}")

ValueError: Error raised by inference endpoint: An error occurred (ValidationException) when calling the InvokeModel operation: Malformed input request: expected minLength: 1, actual: 0, please reformat your input and try again.

tinomaxthayil avatar Jan 25 '24 03:01 tinomaxthayil

the same here for Python 3.11.4 ragas 0.0.22

andriimazur93 avatar Jan 25 '24 14:01 andriimazur93

I'm working with bedrock claude model as well. Can you please share what embedding model you used here? One things i dont really understand is if we using bedrock model, then we have to use embedding model from bedrock? Should this not be matching the embedding model actually used for inference?

hteeyeoh avatar Jan 29 '24 02:01 hteeyeoh

And also did you obeserved that with bedrock ofthen the metric result i obtained will be "null", "nan" for faithfullness and context_precision. "context_precision":null,"faithfulness":null,"answer_relevancy":0.7777600149

hteeyeoh avatar Jan 29 '24 02:01 hteeyeoh

hey @shahules786 made some improvements here, could you take a look?

jjmachan avatar Feb 05 '24 20:02 jjmachan