problem with answer_relevance - all time NAN for local llm
[ ] I checked the documentation and related resources and couldn't find an answer to my question.
Your Question I want to make ragas score for custom llm and i have problem with answer relevance - all the time i have message NAN. Whats wrong with my code?
Code Examples
custom_llm = custom_llm('TheBloke/openchat_3.5-AWQ')
customHF_embeddings = CustomHuggingFaceRagasEmbeddings('TheBloke/openchat_3.5-AWQ')
responce_relev = ResponseRelevancy(llm=custom_llm)
metrics = [responce_relev]
runConfig = RunConfig(timeout=300,max_retries=28,max_wait=120,max_workers=2,log_tenacity=True)
all_llm_metrics=[]
for elem_data in test_data:
dataset = Dataset.from_pandas(pd.DataFrame(elem_data))
id = elem_data.get('id')
result = evaluate(
dataset=dataset,
metrics=metrics,
batch_size=2,
embeddings=customHF_embeddings,
llm=custom_llm,
run_config = runConfig,
show_progress=True
)
class CustomHuggingFaceRagasEmbeddings(BaseRagasEmbeddings):
def __init__(self, model_name: str, custom_embeddings: list = None, device: str = None):
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModel.from_pretrained(model_name)
self.custom_embeddings = custom_embeddings
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
# Определяем устройство (GPU или CPU)
self.device = device if device else ('cuda' if torch.cuda.is_available() else 'cpu')
self.model.to(self.device)
def embed_documents(self, texts: list) -> np.ndarray:
if self.custom_embeddings is not None:
return self.custom_embeddings
inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
inputs = {key: value.to(self.device) for key, value in inputs.items()}
with torch.no_grad():
outputs = self.model(**inputs,output_hidden_states=True)
# Получаем скрытые состояния последнего слоя
last_hidden_state = outputs.hidden_states[-1]
embeddings = last_hidden_state[:, 0].cpu().numpy()
print(type(embeddings))
return embeddings
def embed_query(self, query: str) -> np.ndarray:
if self.custom_embeddings is not None:
return self.custom_embeddings[0].clone()
inputs = self.tokenizer(query, return_tensors='pt', padding=True, truncation=True)
inputs = {key: value.to(self.device) for key, value in inputs.items()}
with torch.no_grad():
outputs = self.model(**inputs,output_hidden_states=True)
last_hidden_state = outputs.hidden_states[-1]
embedding = last_hidden_state[:, 0].cpu().numpy()
return embedding
async def aembed_query(self, query: str) -> np.ndarray:
if self.custom_embeddings is not None:
return np.array(self.custom_embeddings[0])
loop = asyncio.get_running_loop()
inputs = await loop.run_in_executor(None, self.tokenizer, query, {'return_tensors': 'pt', 'padding': True, 'truncation': True})
inputs = {key: value.to(self.device) for key, value in inputs.items()}
with torch.no_grad():
outputs = await loop.run_in_executor(None, self.model, **inputs)
embedding = outputs.last_hidden_state[:, 0].cpu().numpy()
print('async query:'+str(embedding.ndim))
return embedding
async def aembed_documents(self, texts: List[str]) -> np.ndarray:
if self.custom_embeddings is not None:
return np.array(self.custom_embeddings)
loop = asyncio.get_running_loop()
inputs = await loop.run_in_executor(None, self.tokenizer, texts, {'return_tensors': 'pt', 'padding': True, 'truncation': True})
inputs = {key: value.to(self.device) for key, value in inputs.items()}
with torch.no_grad():
outputs = await loop.run_in_executor(None, self.model, **inputs)
embeddings = outputs.last_hidden_state[:, 0].cpu().numpy()
return embeddings
def embed_text(self, text: Union[str, List[str]]) -> np.ndarray:
if self.custom_embeddings is not None:
return np.array(self.custom_embeddings)
if isinstance(text, str):
text = [text]
inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True)
inputs = {key: value.to(self.device) for key, value in inputs.items()}
with torch.no_grad():
outputs = self.model(**inputs)
embeddings = outputs.cpu().numpy()
return embeddings
async def aembed_text(self, text: Union[str, List[str]]) -> np.ndarray:
if self.custom_embeddings is not None:
return np.array(self.custom_embeddings)
if isinstance(text, str):
text = [text]
loop = asyncio.get_running_loop()
inputs = await loop.run_in_executor(
None, self.tokenizer, text, {'return_tensors': 'pt', 'padding': True, 'truncation': True})
inputs = {key: value.to(self.device) for key, value in inputs.items()}
with torch.no_grad():
outputs = await loop.run_in_executor(None, self.model, **inputs)
#embeddings = outputs.last_hidden_state[:, 0].cpu().numpy()
embeddings = outputs.cpu().numpy()
return embeddings
Additional context
So i print info from _answer_relevansy: RuntimeWarning: overflow encountered in reduce
return sqrt(add.reduce(s, axis=axis, keepdims=keepdims))
[[-1.589 0.428 -1.758 ... -0.4258 -1.761 2.39 ] [-1.589 0.429 -1.758 ... -0.4275 -1.761 2.385 ] [-1.589 0.428 -1.758 ... -0.4258 -1.759 2.39 ]] [[-1.587 ] [ 0.4282] [-1.755 ] ...
[-0.4268] [-1.758 ] [ 2.387 ]]
linalg norm gen que [inf inf inf] linalg norm que [inf] [inf inf inf] [inf inf inf]
site-packages/ragas/metrics/_answer_relevance.py:119: RuntimeWarning: invalid value encountered in divide
np.dot(gen_question_vec, question_vec.T).reshape(