"Internal Server Error" if use AzureChatOpenAI through company authorization, but OpenAI directly OK
crewai 0.19.0 openai 1.13.3
Error message:
2024-03-05 16:42:03,488 - INFO - HTTP Request: POST https://XXXXX-apim-qas.XXXXX.com/openai/deployments/gpt-35-turbo/chat/completions?api-version=2023-12-01-preview "HTTP/1.1 500 Internal Server Error"
I got 'llm' through company internal authorization
from langchain_openai import AzureChatOpenAI
model_kwargs = {
'extra_headers': {
'Ocp-Apim-Subscription-Key': apim_subcription_key,
'Authorization': f"Bearer {MsIdentity().get_access_token()}" # MSAL
}
}
llm = AzureChatOpenAI(
temperature = 0,
azure_endpoint=openai_api_base,
openai_api_version=openai_api_version,
openai_api_type="azure",
deployment_name="gpt-4",
openai_api_key='fake_openai_api_key', # MSAL requires a dummy string
model_kwargs=model_kwargs,
)
and this llm itself works fine:
llm.invoke("hi")
Output: AIMessage(content='Hello! How can I assist you today?')
But failed with CrewAI:
dummy_agent = Agent(
role="do nothing",
goal="",
backstory="",
allow_delegation=False,
llm=llm,
)
dummy_task = Task(
description="do nothing",
expected_output = "",
agent=dummy_agent,
allow_delegation=False,
)
crew = Crew(
agents=[dummy_agent],
tasks=[dummy_task],
process=Process.sequential,
llm=llm,
)
crew.kickoff()
Entire error messages:
InternalServerError Traceback (most recent call last)
File <timed exec>:25
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\crewai\crew.py:198, in Crew.kickoff(self, inputs)
195 metrics = []
197 if self.process == Process.sequential:
--> 198 result = self._run_sequential_process()
199 elif self.process == Process.hierarchical:
200 result, manager_metrics = self._run_hierarchical_process()
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\crewai\crew.py:232, in Crew._run_sequential_process(self)
229 self._logger.log("debug", f"Working Agent: {role}")
230 self._logger.log("info", f"Starting Task: {task.description}")
--> 232 output = task.execute(context=task_output)
233 if not task.async_execution:
234 task_output = output
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\crewai\task.py:132, in Task.execute(self, agent, context, tools)
130 self.thread.start()
131 else:
--> 132 result = self._execute(
133 task=self,
134 agent=agent,
135 context=context,
136 tools=tools,
137 )
138 return result
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\crewai\task.py:141, in Task._execute(self, agent, task, context, tools)
140 def _execute(self, agent, task, context, tools):
--> 141 result = agent.execute_task(
142 task=task,
143 context=context,
144 tools=tools,
145 )
147 exported_output = self._export_output(result)
149 self.output = TaskOutput(
150 description=self.description,
151 exported_output=exported_output,
152 raw_output=result,
153 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\crewai\agent.py:171, in Agent.execute_task(self, task, context, tools)
168 self.agent_executor.tools_description = render_text_description(tools)
169 self.agent_executor.tools_names = self.__tools_names(tools)
--> 171 result = self.agent_executor.invoke(
172 {
173 "input": task_prompt,
174 "tool_names": self.agent_executor.tools_names,
175 "tools": self.agent_executor.tools_description,
176 }
177 )["output"]
179 if self.max_rpm:
180 self._rpm_controller.stop_rpm_counter()
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain\chains\base.py:163, in Chain.invoke(self, input, config, **kwargs)
161 except BaseException as e:
162 run_manager.on_chain_error(e)
--> 163 raise e
164 run_manager.on_chain_end(outputs)
166 if include_run_info:
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain\chains\base.py:153, in Chain.invoke(self, input, config, **kwargs)
150 try:
151 self._validate_inputs(inputs)
152 outputs = (
--> 153 self._call(inputs, run_manager=run_manager)
154 if new_arg_supported
155 else self._call(inputs)
156 )
158 final_outputs: Dict[str, Any] = self.prep_outputs(
159 inputs, outputs, return_only_outputs
160 )
161 except BaseException as e:
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\crewai\agents\executor.py:64, in CrewAgentExecutor._call(self, inputs, run_manager)
62 while self._should_continue(self.iterations, time_elapsed):
63 if not self.request_within_rpm_limit or self.request_within_rpm_limit():
---> 64 next_step_output = self._take_next_step(
65 name_to_tool_map,
66 color_mapping,
67 inputs,
68 intermediate_steps,
69 run_manager=run_manager,
70 )
72 if self.step_callback:
73 self.step_callback(next_step_output)
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain\agents\agent.py:1097, in AgentExecutor._take_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
1088 def _take_next_step(
1089 self,
1090 name_to_tool_map: Dict[str, BaseTool],
(...)
1094 run_manager: Optional[CallbackManagerForChainRun] = None,
1095 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]:
1096 return self._consume_next_step(
-> 1097 [
1098 a
1099 for a in self._iter_next_step(
1100 name_to_tool_map,
1101 color_mapping,
1102 inputs,
1103 intermediate_steps,
1104 run_manager,
1105 )
1106 ]
1107 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain\agents\agent.py:1097, in <listcomp>(.0)
1088 def _take_next_step(
1089 self,
1090 name_to_tool_map: Dict[str, BaseTool],
(...)
1094 run_manager: Optional[CallbackManagerForChainRun] = None,
1095 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]:
1096 return self._consume_next_step(
-> 1097 [
1098 a
1099 for a in self._iter_next_step(
1100 name_to_tool_map,
1101 color_mapping,
1102 inputs,
1103 intermediate_steps,
1104 run_manager,
1105 )
1106 ]
1107 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\crewai\agents\executor.py:118, in CrewAgentExecutor._iter_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
116 intermediate_steps = self._prepare_intermediate_steps(intermediate_steps)
117 # Call the LLM to see what to do.
--> 118 output = self.agent.plan(
119 intermediate_steps,
120 callbacks=run_manager.get_child() if run_manager else None,
121 **inputs,
122 )
124 except OutputParserException as e:
125 if isinstance(self.handle_parsing_errors, bool):
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain\agents\agent.py:387, in RunnableAgent.plan(self, intermediate_steps, callbacks, **kwargs)
381 # Use streaming to make sure that the underlying LLM is invoked in a streaming
382 # fashion to make it possible to get access to the individual LLM tokens
383 # when using stream_log with the Agent Executor.
384 # Because the response from the plan is not a generator, we need to
385 # accumulate the output into final output and return that.
386 final_output: Any = None
--> 387 for chunk in self.runnable.stream(inputs, config={"callbacks": callbacks}):
388 if final_output is None:
389 final_output = chunk
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\runnables\base.py:2446, in RunnableSequence.stream(self, input, config, **kwargs)
2440 def stream(
2441 self,
2442 input: Input,
2443 config: Optional[RunnableConfig] = None,
2444 **kwargs: Optional[Any],
2445 ) -> Iterator[Output]:
-> 2446 yield from self.transform(iter([input]), config, **kwargs)
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\runnables\base.py:2433, in RunnableSequence.transform(self, input, config, **kwargs)
2427 def transform(
2428 self,
2429 input: Iterator[Input],
2430 config: Optional[RunnableConfig] = None,
2431 **kwargs: Optional[Any],
2432 ) -> Iterator[Output]:
-> 2433 yield from self._transform_stream_with_config(
2434 input,
2435 self._transform,
2436 patch_config(config, run_name=(config or {}).get("run_name") or self.name),
2437 **kwargs,
2438 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\runnables\base.py:1513, in Runnable._transform_stream_with_config(self, input, transformer, config, run_type, **kwargs)
1511 try:
1512 while True:
-> 1513 chunk: Output = context.run(next, iterator) # type: ignore
1514 yield chunk
1515 if final_output_supported:
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\runnables\base.py:2397, in RunnableSequence._transform(self, input, run_manager, config)
2388 for step in steps:
2389 final_pipeline = step.transform(
2390 final_pipeline,
2391 patch_config(
(...)
2394 ),
2395 )
-> 2397 for output in final_pipeline:
2398 yield output
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\runnables\base.py:1051, in Runnable.transform(self, input, config, **kwargs)
1048 final: Input
1049 got_first_val = False
-> 1051 for chunk in input:
1052 if not got_first_val:
1053 final = chunk
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\runnables\base.py:4173, in RunnableBindingBase.transform(self, input, config, **kwargs)
4167 def transform(
4168 self,
4169 input: Iterator[Input],
4170 config: Optional[RunnableConfig] = None,
4171 **kwargs: Any,
4172 ) -> Iterator[Output]:
-> 4173 yield from self.bound.transform(
4174 input,
4175 self._merge_configs(config),
4176 **{**self.kwargs, **kwargs},
4177 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\runnables\base.py:1061, in Runnable.transform(self, input, config, **kwargs)
1058 final = final + chunk # type: ignore[operator]
1060 if got_first_val:
-> 1061 yield from self.stream(final, config, **kwargs)
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\language_models\chat_models.py:250, in BaseChatModel.stream(self, input, config, stop, **kwargs)
243 except BaseException as e:
244 run_manager.on_llm_error(
245 e,
246 response=LLMResult(
247 generations=[[generation]] if generation else []
248 ),
249 )
--> 250 raise e
251 else:
252 run_manager.on_llm_end(LLMResult(generations=[[generation]]))
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_core\language_models\chat_models.py:234, in BaseChatModel.stream(self, input, config, stop, **kwargs)
232 generation: Optional[ChatGenerationChunk] = None
233 try:
--> 234 for chunk in self._stream(
235 messages, stop=stop, run_manager=run_manager, **kwargs
236 ):
237 yield chunk.message
238 if generation is None:
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\langchain_openai\chat_models\base.py:419, in ChatOpenAI._stream(self, messages, stop, run_manager, **kwargs)
416 params = {**params, **kwargs, "stream": True}
418 default_chunk_class = AIMessageChunk
--> 419 for chunk in self.client.create(messages=message_dicts, **params):
420 if not isinstance(chunk, dict):
421 chunk = chunk.model_dump()
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_utils\_utils.py:275, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
273 msg = f"Missing required argument: {quote(missing[0])}"
274 raise TypeError(msg)
--> 275 return func(*args, **kwargs)
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\resources\chat\completions.py:663, in Completions.create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_tokens, n, presence_penalty, response_format, seed, stop, stream, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
611 @required_args(["messages", "model"], ["messages", "model", "stream"])
612 def create(
613 self,
(...)
661 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
662 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
--> 663 return self._post(
664 "/chat/completions",
665 body=maybe_transform(
666 {
667 "messages": messages,
668 "model": model,
669 "frequency_penalty": frequency_penalty,
670 "function_call": function_call,
671 "functions": functions,
672 "logit_bias": logit_bias,
673 "logprobs": logprobs,
674 "max_tokens": max_tokens,
675 "n": n,
676 "presence_penalty": presence_penalty,
677 "response_format": response_format,
678 "seed": seed,
679 "stop": stop,
680 "stream": stream,
681 "temperature": temperature,
682 "tool_choice": tool_choice,
683 "tools": tools,
684 "top_logprobs": top_logprobs,
685 "top_p": top_p,
686 "user": user,
687 },
688 completion_create_params.CompletionCreateParams,
689 ),
690 options=make_request_options(
691 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
692 ),
693 cast_to=ChatCompletion,
694 stream=stream or False,
695 stream_cls=Stream[ChatCompletionChunk],
696 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_base_client.py:1200, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1186 def post(
1187 self,
1188 path: str,
(...)
1195 stream_cls: type[_StreamT] | None = None,
1196 ) -> ResponseT | _StreamT:
1197 opts = FinalRequestOptions.construct(
1198 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1199 )
-> 1200 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_base_client.py:889, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
880 def request(
881 self,
882 cast_to: Type[ResponseT],
(...)
887 stream_cls: type[_StreamT] | None = None,
888 ) -> ResponseT | _StreamT:
--> 889 return self._request(
890 cast_to=cast_to,
891 options=options,
892 stream=stream,
893 stream_cls=stream_cls,
894 remaining_retries=remaining_retries,
895 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_base_client.py:965, in SyncAPIClient._request(self, cast_to, options, remaining_retries, stream, stream_cls)
963 if retries > 0 and self._should_retry(err.response):
964 err.response.close()
--> 965 return self._retry_request(
966 options,
967 cast_to,
968 retries,
969 err.response.headers,
970 stream=stream,
971 stream_cls=stream_cls,
972 )
974 # If the response is streamed then we need to explicitly read the response
975 # to completion before attempting to access the response text.
976 if not err.response.is_closed:
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_base_client.py:1013, in SyncAPIClient._retry_request(self, options, cast_to, remaining_retries, response_headers, stream, stream_cls)
1009 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1010 # different thread if necessary.
1011 time.sleep(timeout)
-> 1013 return self._request(
1014 options=options,
1015 cast_to=cast_to,
1016 remaining_retries=remaining,
1017 stream=stream,
1018 stream_cls=stream_cls,
1019 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_base_client.py:965, in SyncAPIClient._request(self, cast_to, options, remaining_retries, stream, stream_cls)
963 if retries > 0 and self._should_retry(err.response):
964 err.response.close()
--> 965 return self._retry_request(
966 options,
967 cast_to,
968 retries,
969 err.response.headers,
970 stream=stream,
971 stream_cls=stream_cls,
972 )
974 # If the response is streamed then we need to explicitly read the response
975 # to completion before attempting to access the response text.
976 if not err.response.is_closed:
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_base_client.py:1013, in SyncAPIClient._retry_request(self, options, cast_to, remaining_retries, response_headers, stream, stream_cls)
1009 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1010 # different thread if necessary.
1011 time.sleep(timeout)
-> 1013 return self._request(
1014 options=options,
1015 cast_to=cast_to,
1016 remaining_retries=remaining,
1017 stream=stream,
1018 stream_cls=stream_cls,
1019 )
File c:\Users\xxx\Miniconda3\envs\IBTools\lib\site-packages\openai\_base_client.py:980, in SyncAPIClient._request(self, cast_to, options, remaining_retries, stream, stream_cls)
977 err.response.read()
979 log.debug("Re-raising status error")
--> 980 raise self._make_status_error_from_response(err.response) from None
982 return self._process_response(
983 cast_to=cast_to,
984 options=options,
(...)
987 stream_cls=stream_cls,
988 )
InternalServerError: Error code: 500 - {'statusCode': 500, 'message': 'Internal server error', 'activityId': 'd89b1c29-41d1-4678-ace2-123a193a219e'}
I have the same issue, and found that LangChain does not support it yet. I resolved the issue by creating my custom LLM from BaseChatModel. You can refer to the implementation of AzureChatOpenAI.
Found this: https://python.langchain.com/docs/modules/model_io/chat/custom_chat_model/
Consider this: https://python.langchain.com/docs/integrations/chat/groq/
This way works https://python.langchain.com/docs/modules/model_io/chat/custom_chat_model/
@masonzhang Thank you very much!!
Did you look into exactly where it breaks?
For me when you try this way
llm = AzureChatOpenAI( default_headers={ "Authorization": f"Bearer {token}", "Ocp-Apim-Subscription-Key": "key", 'Cache-Control': 'no-cache', 'Content-Type': 'application/json', 'Ocp-Apim-Trace': 'true' }, azure_endpoint=APIM_BASE_URL, # Set your APIM base URL here model_name=COMPLETION_MODEL, # Model name for the request azure_deployment=COMPLETION_DEPLOYMENT, # Deployment name configured in Azure OpenAI max_tokens=SUMMARY_MAX_TOKENS, api_version="2024-05-01-preview", temperature=SUMMARY_TEMPERATURE )
This works for me.
When i try to invoke
llm.invoke("Hello")
It responds correctly with
AIMessage(content='Hello! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17}, 'model_name': 'gpt-35', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, id='xxx', usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17})
When i try APIM with crewAI, it gives me below error
{'statusCode': 500, 'message': 'Internal server error', 'activityId': 'ba2f4130-8135-xxx'}
I tried to debug the trace and found that it fails at policy to identify [\"usage\"][\"total_tokens\"] for some reason CrewAI is not passing a valid JSON and it doesn't have total token as a tag.
I am still confused as it works for langchain even though in output its under usage_metadata - Seems like CrewAI is changing something internally to save total token as Crew Output-- https://docs.crewai.com/core-concepts/Crews/#crew-output format?
Any help would be much appreciated!