llama-stack
llama-stack copied to clipboard
Object of type SearchEngineType is not JSON serializable
Hi,
I try to use the bing search engine tool and I have the Object of type SearchEngineType is not JSON serializable error when executing the agent.create_agent(agent_config) line.
There is my code and I don't know what I'm missing that make this error happening, any help appreciated : class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}")
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(
agent_config=agent_config,
)
self.agent_id = agent.agent_id
session = self.client.agents.session.create(
agent_id=agent.agent_id,
session_name="example_session",
)
self.session_id = session.session_id
async def execute_turn(self, content: str):
response = self.client.agents.turn.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=[
UserMessage(content=content, role="user"),
],
stream=True,
)
for chunk in response:
if chunk.event.payload.event_type != "turn_complete":
yield chunk
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[ SearchToolDefinition( engine=SearchEngineType.bing, api_key=os.getenv("BING_SEARCH_API_KEY"), ) ], tool_choice=ToolChoice.auto, )
try:
agent = Agent(host, port)
except Exception as erreur:
print(f"Erreur Agent : {erreur}")
try:
agent.create_agent(agent_config)
except Exception as erreur:
print(f"Erreur Create Agent : {erreur}")
Since you are using agents, you need to use AgentConfigSearchToolDefinition in the agent_config and not SearchToolDefinition. In the code base checkout the object AgentConfigSearchToolDefinition and use that and for future references, whenever you are using inbuilt tools in agent_config use AgentConfigTools as they are all present in the code already.
Thank you for the answer and I changed my code for :
agent_config = AgentConfig(
model="Llama3.2-3B-Instruct",
instructions="You are a helpful assistant and you answer in french concisely and precisely.",
sampling_params=SamplingParams(temperature=0.0, top_p=0.9),
enable_session_persistence=False,
tools=[
AgentConfigToolSearchToolDefinition(
engine="brave",
api_key=os.getenv("BRAVE_SEARCH_API_KEY"),
type="brave_search"
)
],
tool_choice=ToolChoice.auto,
)
But I still have the following error : Erreur Create Agent : Object of type SearchEngineType is not JSON serializable, any idea what I'm missing ?
Can you send your stack trace and the code properly? Start to end
Sure !
I have 0 error on stack :
(base) penta@0o-Legion:~$ llama stack run Cortana --disable-ipv6
Using config /home/penta/.llama/builds/conda/Cortana-run.yaml
Resolved 12 providers
inner-inference => remote::ollama
models => routing_table
inference => autorouted
inner-safety => meta-reference
shields => routing_table
safety => autorouted
inner-memory => meta-reference
memory_banks => routing_table
memory => autorouted
agents => meta-reference
telemetry => meta-reference
inspect => builtin
Initializing Ollama, checking connectivity to server... Serving API memory_banks GET /memory_banks/get GET /memory_banks/list POST /memory_banks/register Serving API safety POST /safety/run_shield Serving API shields GET /shields/get GET /shields/list POST /shields/register Serving API memory POST /memory/insert POST /memory/query Serving API telemetry GET /telemetry/get_trace POST /telemetry/log_event Serving API agents POST /agents/create POST /agents/session/create POST /agents/turn/create POST /agents/delete POST /agents/session/delete POST /agents/session/get POST /agents/step/get POST /agents/turn/get Serving API inspect GET /health GET /providers/list GET /routes/list Serving API inference POST /inference/chat_completion POST /inference/completion POST /inference/embeddings Serving API models GET /models/get GET /models/list POST /models/register
Listening on 0.0.0.0:5000 INFO: Started server process [3199] INFO: Waiting for application startup. INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:5000 (Press CTRL+C to quit)
And there is my code : import warnings from pathlib import Path import tiktoken from tiktoken.load import load_tiktoken_bpe from termcolor import cprint, colored import fire import asyncio import time from llama_stack_client import LlamaStackClient from llama_stack_client.types import UserMessage from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig, AgentConfigToolSearchToolDefinition, AgentConfigTool
from llama_stack_client.lib.agents import * from llama_stack.apis.agents.agents import * from llama_stack.apis.agents.client import *
import os from dotenv import load_dotenv
warnings.filterwarnings('ignore')
load_dotenv('fichier.env')
## Initialize tiktoken tokenizer
tokenizer_path = "tokenizer.model" num_reserved_special_tokens = 256
mergeable_ranks = load_tiktoken_bpe(tokenizer_path)
special_tokens = [ "<|begin_of_text|>", "<|end_of_text|>", "<|reserved_special_token_0|>", "<|reserved_special_token_1|>", "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>", "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>", ] reserved_tokens = [ f"<|reserved_special_token_{2 + i}|>" for i in range(num_reserved_special_tokens - len(special_tokens)) ] special_tokens = special_tokens + reserved_tokens
tokenizer = tiktoken.Encoding( name=Path(tokenizer_path).name, pat_str=r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]|\s[\r\n]+|\s+(?!\S)|\s+", mergeable_ranks=mergeable_ranks, special_tokens={token: len(mergeable_ranks) + i for i, token in enumerate(special_tokens)}, )
Define a list of colors
colors = ["red", "green", "yellow", "blue", "magenta", "cyan", "white"]
def colorize_tokens(text): # Tokenize the input encoded_tokens = tokenizer.encode(text, allowed_special="all") colorized_text = ""
# Assign each token a color from the list
for i, token in enumerate(encoded_tokens):
color = colors[i % len(colors)] # Cycle through the list of colors
token_text = tokenizer.decode([token])
colorized_text += colored(token_text, color) + " "
return colorized_text
def count_tokens(text): return len(tokenizer.encode(text, allowed_special="all"))
class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}")
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(
agent_config=agent_config,
)
self.agent_id = agent.agent_id
session = self.client.agents.session.create(
agent_id=agent.agent_id,
session_name="example_session",
)
self.session_id = session.session_id
async def execute_turn(self, content: str):
response = self.client.agents.turn.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=[
UserMessage(content=content, role="user"),
],
stream=True,
)
for chunk in response:
if chunk.event.payload.event_type != "turn_complete":
yield chunk
web_search = AgentConfigToolSearchToolDefinition( engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY"), type="brave_search" )
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[web_search], tool_choice=ToolChoice.auto, )
try:
agent = Agent(host, port)
except Exception as erreur:
print(f"Erreur Agent : {erreur}")
try:
agent.create_agent(agent_config)
except Exception as erreur:
print(f"Erreur Create Agent : {erreur}")
total_tokens = 0
while True:
# User input
cprint("Vous: ", "green", end="")
question = input() # User types their question
if question.lower() == "bye":
print("Fin de la session.")
break
# Tokenize and colorize the user's input
cprint("Tokenized question:", "yellow")
colorized_question = colorize_tokens(question)
print(colorized_question)
# Count tokens
question_tokens = count_tokens(question)
cprint(f"Nombre de tokens dans la question: {question_tokens}", "cyan")
# Start measuring time
start_time = time.time()
#Variable to hold the collected response
response_text = ""
#Execute turn and await response
try:
response = agent.execute_turn(content=question)
except Exception as erreur:
print(f"Erreur pendant l'exécution du tour: {erreur}")
async for log in EventLogger().log(response):
if log is not None:
log.print()
response_text += str(log)
# Time taken for inference
end_time = time.time()
time_taken = end_time - start_time
cprint(f"Time taken for inference: {time_taken:.2f} seconds", "cyan")
# Count response tokens
response_tokens = count_tokens(response_text)
cprint(f"Nombre de tokens dans la réponse: {response_tokens}", "cyan")
# Total tokens check
total_tokens_turn = question_tokens + response_tokens
total_tokens += total_tokens_turn
cprint(f"Nombre de Token de la session : {total_tokens}", "cyan")
if total_tokens >= 4096:
cprint("Attention: la limite de contexte de 4096 tokens est atteinte!", "red")
def main(host: str, port: int, stream: bool = True): asyncio.run(run_main(host, port, stream))
if name == "main": fire.Fire(main)
Firstly its self.client.agents.sessions.create and self.client.agents.turns.create. You have written session and turn respectively in their place that could be causing an issue. Update that and let me know
class Agent:
def __init__(self, host, port):
self.client = LlamaStackClient(base_url=f"http://{host}:{port}")
self.agent_id = None
self.session_id = None
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(
agent_config=agent_config,
)
self.agent_id = agent.agent_id
session = self.client.agents.sessions.create(
agent_id=agent.agent_id,
session_name="example_session",
)
self.session_id = session.session_id
def execute_turn(self, content: str):
response = self.client.agents.turns.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=[
UserMessage(content=content, role="user"),
],
stream=True,
)
return response
# for chunk in response:
# if chunk.event.payload.event_type != "turn_complete":
# yield chunk
Still the same error with your modification : Erreur Create Agent : Object of type SearchEngineType is not JSON serializable
I am unable to solve this because I can't see where SearchEngineType is being used in this code
Here : web_search = AgentConfigToolSearchToolDefinition( engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY"), type="brave_search" )
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[web_search], tool_choice=ToolChoice.auto, )
try:
agent = Agent(host, port)
except Exception as erreur:
print(f"Erreur Agent : {erreur}")
try:
agent.create_agent(agent_config)
except Exception as erreur:
print(f"Erreur Create Agent : {erreur}")
You are using AgentConfigToolSearchToolDefinition and not SearchEngineType
class AgentConfigToolSearchToolDefinition(TypedDict, total=False):
api_key: Required[str]
engine: Required[Literal["bing", "brave"]]
type: Required[Literal["brave_search"]]
input_shields: List[str]
output_shields: List[str]
remote_execution: AgentConfigToolSearchToolDefinitionRemoteExecution
This is its definition. SearchEngineType is a object which is Enum
I've tried previously with SearchEngineType (check my first post) and I had the same error.
So with this code I don't have the error anymore but I don't have the web search working either : class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.agent_id = None self.session_id = None
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(
agent_config=agent_config,
)
self.agent_id = agent.agent_id
# Generate a unique session ID using uuid4
unique_session_name = str(uuid.uuid4())
# Use the unique session name in your code
session = self.client.agents.session.create(
agent_id=agent.agent_id,
session_name=unique_session_name,
)
self.session_id = session.session_id
def execute_turn(self, content: str):
response = self.client.agents.turn.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=[
UserMessage(content=content, role="user"),
],
stream=True,
)
return response
sampling_params = SamplingParams( strategy="greedy", temperature=0.0, top_p=0.9, max_tokens=256 )
websearch = [ AgentConfigToolSearchToolDefinition( type="brave_search", engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY") ) ]
Inside run_main function
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=sampling_params, enable_session_persistence=False, tools=websearch, tool_choice="auto", tool_prompt_format="function_tag", )
try:
agent = Agent(host, port)
except Exception as erreur:
print(f"Erreur Agent : {erreur}")
try:
agent.create_agent(agent_config)
except Exception as erreur:
print(f"Erreur Create Agent : {erreur}")
total_tokens = 0
while True:
# User input
cprint("Vous: ", "green", end="")
question = input() # User types their question
if question.lower() == "bye":
print("Fin de la session.")
break
# Tokenize and colorize the user's input
cprint("Tokenized question:", "yellow")
colorized_question = colorize_tokens(question)
print(colorized_question)
# Count tokens
question_tokens = count_tokens(question)
cprint(f"Nombre de tokens dans la question: {question_tokens}", "cyan")
# Start measuring time
start_time = time.time()
#Variable to hold the collected response
response_text = ""
#Execute turn and await response
try:
response = agent.execute_turn(content=question)
except Exception as erreur:
print(f"Erreur pendant l'exécution du tour: {erreur}")
for chunk in response:
if chunk.event.payload.event_type != "turn_complete":
if hasattr(chunk.event.payload, 'text_delta_model_response'):
response_text += chunk.event.payload.text_delta_model_response
print(chunk.event.payload.text_delta_model_response, end='', flush=True)
Any help appreciated.
This issue has been automatically marked as stale because it has not had activity within 60 days. It will be automatically closed if no further activity occurs within 30 days.
This issue has been automatically closed due to inactivity. Please feel free to reopen if you feel it is still relevant!