problem with answer_relevance - all time NAN for local llm

Open spynet-us opened this issue 10 months ago • 1 comments

[ ] I checked the documentation and related resources and couldn't find an answer to my question.

Your Question I want to make ragas score for custom llm and i have problem with answer relevance - all the time i have message NAN. Whats wrong with my code?

Code Examples

custom_llm = custom_llm('TheBloke/openchat_3.5-AWQ')
customHF_embeddings = CustomHuggingFaceRagasEmbeddings('TheBloke/openchat_3.5-AWQ')
responce_relev = ResponseRelevancy(llm=custom_llm)

metrics = [responce_relev]

runConfig = RunConfig(timeout=300,max_retries=28,max_wait=120,max_workers=2,log_tenacity=True)

all_llm_metrics=[]   

for elem_data in test_data:
               dataset = Dataset.from_pandas(pd.DataFrame(elem_data))
               id = elem_data.get('id') 
               result = evaluate(
                   dataset=dataset,
                   metrics=metrics,
                   batch_size=2,
                   embeddings=customHF_embeddings,
                   llm=custom_llm,
                   run_config = runConfig,
                   show_progress=True
)

class CustomHuggingFaceRagasEmbeddings(BaseRagasEmbeddings):
    def __init__(self, model_name: str, custom_embeddings: list = None, device: str = None):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.custom_embeddings = custom_embeddings

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

       

        # Определяем устройство (GPU или CPU)
        self.device = device if device else ('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)  

 

    def embed_documents(self, texts: list) -> np.ndarray:
        if self.custom_embeddings is not None:
            return self.custom_embeddings

        inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
        inputs = {key: value.to(self.device) for key, value in inputs.items()}  

        with torch.no_grad():
            outputs = self.model(**inputs,output_hidden_states=True)

 

        # Получаем скрытые состояния последнего слоя
        last_hidden_state = outputs.hidden_states[-1]
        embeddings = last_hidden_state[:, 0].cpu().numpy()  
        print(type(embeddings))
        return embeddings

      

    def embed_query(self, query: str) -> np.ndarray:
        if self.custom_embeddings is not None:
            return self.custom_embeddings[0].clone()

        inputs = self.tokenizer(query, return_tensors='pt', padding=True, truncation=True)
        inputs = {key: value.to(self.device) for key, value in inputs.items()} 

        with torch.no_grad():
            outputs = self.model(**inputs,output_hidden_states=True)

        last_hidden_state = outputs.hidden_states[-1]
        embedding = last_hidden_state[:, 0].cpu().numpy()  

        return embedding

        

    async def aembed_query(self, query: str) -> np.ndarray:
        if self.custom_embeddings is not None:
            return np.array(self.custom_embeddings[0])

        loop = asyncio.get_running_loop()
        inputs = await loop.run_in_executor(None, self.tokenizer, query, {'return_tensors': 'pt', 'padding': True, 'truncation': True})
        inputs = {key: value.to(self.device) for key, value in inputs.items()} 

        with torch.no_grad():
            outputs = await loop.run_in_executor(None, self.model, **inputs)

        embedding = outputs.last_hidden_state[:, 0].cpu().numpy()  
        print('async query:'+str(embedding.ndim))

        return embedding

 

    async def aembed_documents(self, texts: List[str]) -> np.ndarray:
        if self.custom_embeddings is not None:
            return np.array(self.custom_embeddings)

        loop = asyncio.get_running_loop()
        inputs = await loop.run_in_executor(None, self.tokenizer, texts, {'return_tensors': 'pt', 'padding': True, 'truncation': True})
        inputs = {key: value.to(self.device) for key, value in inputs.items()} 

        with torch.no_grad():
            outputs = await loop.run_in_executor(None, self.model, **inputs)

        embeddings = outputs.last_hidden_state[:, 0].cpu().numpy()  
        return embeddings

 

    def embed_text(self, text: Union[str, List[str]]) -> np.ndarray:
        if self.custom_embeddings is not None:
            return np.array(self.custom_embeddings)

        if isinstance(text, str):
            text = [text]

        inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True)
        inputs = {key: value.to(self.device) for key, value in inputs.items()} 

        with torch.no_grad():
            outputs = self.model(**inputs)

        embeddings = outputs.cpu().numpy()
        return embeddings

 

    async def aembed_text(self, text: Union[str, List[str]]) -> np.ndarray:
        if self.custom_embeddings is not None:
            return np.array(self.custom_embeddings)

        if isinstance(text, str):
            text = [text]

        loop = asyncio.get_running_loop()
        inputs = await loop.run_in_executor(
            None, self.tokenizer, text, {'return_tensors': 'pt', 'padding': True, 'truncation': True})

        inputs = {key: value.to(self.device) for key, value in inputs.items()}  

        with torch.no_grad():
            outputs = await loop.run_in_executor(None, self.model, **inputs)

        #embeddings = outputs.last_hidden_state[:, 0].cpu().numpy()  
        embeddings = outputs.cpu().numpy()

        return embeddings

Additional context

So i print info from _answer_relevansy: RuntimeWarning: overflow encountered in reduce

return sqrt(add.reduce(s, axis=axis, keepdims=keepdims))

[[-1.589 0.428 -1.758 ... -0.4258 -1.761 2.39 ] [-1.589 0.429 -1.758 ... -0.4275 -1.761 2.385 ] [-1.589 0.428 -1.758 ... -0.4258 -1.759 2.39 ]] [[-1.587 ] [ 0.4282] [-1.755 ] ...

[-0.4268] [-1.758 ] [ 2.387 ]]

linalg norm gen que [inf inf inf] linalg norm que [inf] [inf inf inf] [inf inf inf]

site-packages/ragas/metrics/_answer_relevance.py:119: RuntimeWarning: invalid value encountered in divide

np.dot(gen_question_vec, question_vec.T).reshape(

Feb 18 '25 10:02 spynet-us