mem0 relevant_memories in mem0/proxy/main.py misisng "result" key when using Ollama and chat completion

🐛 Describe the bug

When using mem0 with the chat completion feature with the following Ollama config

config = {
    "llm": {
        "provider": "ollama",
        "config": {
            "model": "llama3.1",
            "temperature": 0.1,
            "max_tokens": 128000,
            "ollama_base_url": "http://localhost:11434",
        }
    },
    "graph_store": {
        "provider": "neo4j",
        "config": {
            "url": "neo4j://localhost:7687",
            "username": "neo4j",
            "password": "***"
        },
        "llm" : {
            "provider": "ollama",
            "config": {
                "model": "llama3.1",
                "temperature": 0.0,
                "max_tokens": 128000,
                "ollama_base_url": "http://localhost:11434",
            }
        }
    },
    "vector_store": {
        "provider": "qdrant",
        "config": {
            "collection_name": "mem0-test",
            "host": "localhost",
            "port": 6333,
            "embedding_model_dims": 768,
            "on_disk": True,
        }
    },
    "embedder": {
        "provider": "ollama",
        "config": {
            "model": "nomic-embed-text",
            "ollama_base_url": "http://localhost:11434",
        }
    },
    "version": "v1.1",
}

The retrieved memories in relevant_memories that get sent to _format_query_with_memories are one layer further down under a key called "results". When I change the code on line 181 to go through all memories in relevant_memories['results'] everything works as expected.

- memories_text = "\n".join(memory["memory"] for memory in relevant_memories)
+ memories_text = "\n".join(memory["memory"] for memory in relevant_memories['results'])

full function code with change for completeness:

def _format_query_with_memories(self, messages, relevant_memories):
    memories_text = "\n".join(memory["memory"] for memory in relevant_memories['results'])
    return f"- Relevant Memories/Facts: {memories_text}\n\n- User Question: {messages[-1]['content']}"

Not sure how this should be handled properly to work with all other providers as I'm fairly new to Mem0, but thought I at least should register an issue since I couldn't find anything regarding this so far.

My code for reference:

Code

import logging

from mem0.proxy.main import Mem0

handler = logging.FileHandler(filename="ai-assitant.log", mode='a')
logging.basicConfig(handlers=[handler],
                    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.INFO)
config = {
    "llm": {
        "provider": "ollama",
        "config": {
            "model": "llama3.1",
            "temperature": 0.1,
            "max_tokens": 128000,
            "ollama_base_url": "http://localhost:11434",
        }
    },
    "graph_store": {
        "provider": "neo4j",
        "config": {
            "url": "neo4j://localhost:7687",
            "username": "neo4j",
            "password": "***"
        },
        "llm" : {
            "provider": "ollama",
            "config": {
                "model": "llama3.1",
                "temperature": 0.0,
                "max_tokens": 128000,
                "ollama_base_url": "http://localhost:11434",
            }
        }
    },
    "vector_store": {
        "provider": "qdrant",
        "config": {
            "collection_name": "mem0-test",
            "host": "localhost",
            "port": 6333,
            "embedding_model_dims": 768,
            "on_disk": True,
        }
    },
    "embedder": {
        "provider": "ollama",
        "config": {
            "model": "nomic-embed-text",
            "ollama_base_url": "http://localhost:11434",
        }
    },
    "version": "v1.1",
}

client = Mem0(config=config)
user_id = "aiquen"

# Ask the user for an input
message = input("Welcome to Mem0 AI-Assistant, how can I help you? > ")
while True:
    # Use the input to re-create messages list each time
    messages = [
        {
            "role": "user",
            "content": f"{message}"
        }
    ]

    # Create a chat_completion
    chat_completion = client.chat.completions.create(messages=messages, user_id=user_id, model="ollama/llama3.1")

    # Print the answer from chat_completion
    print(f"Assistant: {chat_completion.choices[0].message.content}")
    message = input("> ")

Sep 26 '24 13:09 aiqueneldar

@Dev-Khant Can I pick this up?

Sep 27 '24 05:09 parshvadaftari

Yes @parshvadaftari Feel free to work on this. Thanks

Sep 27 '24 11:09 Dev-Khant

@aiqueneldar I have raised the PR for this issue.

I also have a suggestion for your code: to print the AI's response in real-time, you might want to use the following approach:

for chunk in chat_completion:
        if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="")

Instead of:

print(f"Assistant: {chat_completion.choices[0].message.content}")

Sep 27 '24 13:09 parshvadaftari

Thank you, both for PR and for the advice! :)

Sep 27 '24 13:09 aiqueneldar