Enable use of an internal service maas.apps.prod.rhoai
We want to enable access to the below example service which is serving models from VLLM
https://maas.apps.prod.rhoai.rh-aiservices-bu.com/ https://maas.apps.prod.rhoai.rh-aiservices-bu.com/examples
from langchain_community.llms import VLLMOpenAI
API_URL = "https://granite-8b-code-instruct-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443"
API_KEY = "***************************"
llm = VLLMOpenAI(
openai_api_key=API_KEY,
openai_api_base=API_URL+"/v1",
model_name="granite-8b-code-instruct-128k",
model_kwargs={"stop": ["."]},
)
print(llm.invoke("Rome is"))
This should already be supported via our usage of langchain_openai. One has to set the following variables in args:
model
openai_api_key # may be something like "EMPTY",
openai_api_base
tried the following:
provider-settings
OpenAI: &active
environment:
# OPENAI_BASE_BASE: "https://****"
# OPENAI_API_KEY: "***"
provider: "ChatOpenAI"
args:
model: "mistral-7b-instruct"
openai_api_key: "https://*****"
openai_api_base: "***"
Connection Error
violation.id=javax-to-jakarta-import-00001>(priority=0(2), depth=0, retries=0)
Traceback (most recent call last):
File "httpx/_transports/default.py", line 67, in map_httpcore_exceptions
File "httpx/_transports/default.py", line 231, in handle_request
File "httpcore/_sync/connection_pool.py", line 207, in handle_request
httpcore.UnsupportedProtocol: Request URL is missing an 'http://' or 'https://' protocol.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "openai/_base_client.py", line 996, in _request
File "httpx/_client.py", line 915, in send
File "httpx/_client.py", line 943, in _send_handling_auth
File "httpx/_client.py", line 980, in _send_handling_redirects
File "httpx/_client.py", line 1016, in _send_single_request
File "httpx/_transports/default.py", line 230, in handle_request
File "contextlib.py", line 158, in __exit__
File "httpx/_transports/default.py", line 84, in map_httpcore_exceptions
httpx.UnsupportedProtocol: Request URL is missing an 'http://' or 'https://' protocol.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "httpx/_transports/default.py", line 67, in map_httpcore_exceptions
File "httpx/_transports/default.py", line 231, in handle_request
File "httpcore/_sync/connection_pool.py", line 207, in handle_request
httpcore.UnsupportedProtocol: Request URL is missing an 'http://' or 'https://' protocol.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "openai/_base_client.py", line 996, in _request
File "httpx/_client.py", line 915, in send
File "httpx/_client.py", line 943, in _send_handling_auth
File "httpx/_client.py", line 980, in _send_handling_redirects
File "httpx/_client.py", line 1016, in _send_single_request
File "httpx/_transports/default.py", line 230, in handle_request
File "contextlib.py", line 158, in __exit__
File "httpx/_transports/default.py", line 84, in map_httpcore_exceptions
httpx.UnsupportedProtocol: Request URL is missing an 'http://' or 'https://' protocol.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "httpx/_transports/default.py", line 67, in map_httpcore_exceptions
File "httpx/_transports/default.py", line 231, in handle_request
File "httpcore/_sync/connection_pool.py", line 207, in handle_request
httpcore.UnsupportedProtocol: Request URL is missing an 'http://' or 'https://' protocol.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "openai/_base_client.py", line 996, in _request
File "httpx/_client.py", line 915, in send
File "httpx/_client.py", line 943, in _send_handling_auth
File "httpx/_client.py", line 980, in _send_handling_redirects
File "httpx/_client.py", line 1016, in _send_single_request
File "httpx/_transports/default.py", line 230, in handle_request
File "contextlib.py", line 158, in __exit__
File "httpx/_transports/default.py", line 84, in map_httpcore_exceptions
httpx.UnsupportedProtocol: Request URL is missing an 'http://' or 'https://' protocol.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "kai/reactive_codeplanner/task_manager/task_manager.py", line 92, in execute_task
File "contextlib.py", line 81, in inner
File "kai/reactive_codeplanner/task_runner/analyzer_lsp/task_runner.py", line 67, in execute_task
File "kai/reactive_codeplanner/agent/analyzer_fix/agent.py", line 139, in execute
File "kai/llm_interfacing/model_provider.py", line 238, in invoke
File "langchain_core/language_models/chat_models.py", line 284, in invoke
File "langchain_core/language_models/chat_models.py", line 860, in generate_prompt
File "langchain_core/language_models/chat_models.py", line 690, in generate
File "langchain_core/language_models/chat_models.py", line 925, in _generate_with_cache
File "langchain_openai/chat_models/base.py", line 771, in _generate
File "langchain_core/language_models/chat_models.py", line 87, in generate_from_stream
File "langchain_openai/chat_models/base.py", line 722, in _stream
File "openai/_utils/_utils.py", line 279, in wrapper
File "openai/resources/chat/completions.py", line 850, in create
File "openai/_base_client.py", line 1283, in post
File "openai/_base_client.py", line 960, in request
File "openai/_base_client.py", line 1020, in _request
File "openai/_base_client.py", line 1098, in _retry_request
File "openai/_base_client.py", line 1020, in _request
File "openai/_base_client.py", line 1098, in _retry_request
File "openai/_base_client.py", line 1030, in _request
openai.APIConnectionError: Connection error.
DEBUG - 2025-02-17 19:53:23,744 - kai.kai.reactive_codeplanner.task_manager.task_manager - Thread-1 - [task_manager.py:97 - execute_task()] - Task execution result: TaskResult(encountered_errors=['Connection error.'], modified_files=[])
DEBUG - 2025-02-17 19:53:23,745 - kai.kai_rpc_application - Thread-1 - [server.py:558 - get_codeplan_agent_solution()] - Task AnalyzerRuleViolation result: TaskResult(encountered_errors=['Connection error.'], modified_files=[])
INFO - 2025-02-17 19:53:23,745 - kai.kai.reactive_codeplanner.task_manager.task_manager - Thread-1 - [task_manager.py:110 - supply_result()] - Supplying result: TaskResult(encountered_errors=['Connection error.'], modified_files=[])
ERROR - 2025-02-17 19:53:23,745 - kai.kai.reactive_codeplanner.task_manager.task_manager - Thread-1 - [task_manager.py:119 - supply_result()] - Encountered errors: ['Connection error.']
DEBUG - 2025-02-17 19:53:23,745 - kai.kai_rpc_application - Thread-1 - [server.py:562 - get_codeplan_agent_solution()] - Executed task AnalyzerRuleViolation
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:572 - get_codeplan_agent_solution()] - TaskResult(encountered_errors=['Connection error.'], modified_files=[])
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:588 - get_codeplan_agent_solution()] - QUEUE_STATE_SEED_TASKS: SUCCESSFUL_TASKS: START
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:591 - get_codeplan_agent_solution()] - QUEUE_STATE_SEED_TASKS: SUCCESSFUL_TASKS: END
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:592 - get_codeplan_agent_solution()] - QUEUE_STATE_SEED_TASKS: IGNORED_TASKS: START
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:597 - get_codeplan_agent_solution()] - QUEUE_STATE_SEED_TASKS: IGNORED_TASKS: END
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:617 - get_codeplan_agent_solution()] - QUEUE_STATE_END_OF_CODE_PLAN: SUCCESSFUL TASKS: START
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:620 - get_codeplan_agent_solution()] - QUEUE_STATE_END_OF_CODE_PLAN: SUCCESSFUL TASKS: END
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:621 - get_codeplan_agent_solution()] - QUEUE_STATE_END_OF_CODE_PLAN: IGNORED_TASKS: START
DEBUG - 2025-02-17 19:53:23,839 - kai.kai_rpc_application - Thread-1 - [server.py:624 - get_codeplan_agent_solution()] - QUEUE_STATE_END_OF_CODE_PLAN: IGNORED_TASKS: END
DEBUG - 2025-02-17 19:53:23,888 - kai.kai-rpc-logger - Thread-1 - [core.py:238 - run()] - Waiting for message
DEBUG - 2025-02-17 19:53:23,888 - kai.jsonrpc - Thread-1 - [streams.py:107 - recv()] - Waiting for message
I was able to get this to work with the ChatOpenAI provider:
parasols-maas: &active
environment:
"OPENAI_API_KEY": "<redacted>"
provider: "ChatOpenAI"
args:
model: "meta-llama/Llama-3.1-8B-Instruct"
base_url: "https://<redacted>:443/v1"
I think this might be easier to use than trying to implement our handling of continuation IMO
@shawn-hurley agree, no desire for the VLLMOpenAI itself if we are good with reusing ChatOpenAI.
Only driver to suggest we may need the VLLMOpenAI was from the examples at https://maas.apps.prod.rhoai.rh-aiservices-bu.com/examples
I attempted to test with 0.0.13 and saw the below
TRACE - 2025-02-28 09:32:28,695 - kai.jsonrpc - Thread-1 - [streams.py:102 - send()] - Sending request: {"jsonrpc":"2.0","error":{"code":-32603,"message":"Failed to create model provider: Error code: 400 - {'object': 'error', 'message': \"[{'type': 'extra_forbidden', 'loc': ('body', 'max_completion_tokens'), 'msg': 'Extra inputs are not permitted', 'input': 1}]\", 'type': 'BadRequestError', 'param': None, 'code': 400}"},"id":0}
DEBUG - 2025-02-28 09:32:28,695 - kai.kai-rpc-logger - Thread-1 - [core.py:245 - run()] - Waiting for message
DEBUG - 2025-02-28 09:32:28,695 - kai.jsonrpc - Thread-1 - [streams.py:109 - recv()] - Waiting for message
TRACE - 2025-02-28 09:32:28,696 - kai.jsonrpc - Thread-1 - [streams.py:112 - recv()] - Reading headers
TRACE - 2025-02-28 09:32:28,696 - kai.jsonrpc - Thread-1 - [streams.py:119 - recv()] - Reading header line
kai rpc server failed to initialize [err: Error: Failed to create model provider: Error code: 400 - {'object': 'error', 'message': "[{'type': 'extra_forbidden', 'loc': ('body', 'max_completion_tokens'), 'msg': 'Extra inputs are not permitted', 'input': 1}]", 'type': 'BadRequestError', 'param': None, 'code': 400}]
My config is: provider-settings.yaml
parasols-maas: &active
environment:
OPENAI_API_KEY: "redacted"
provider: "ChatOpenAI"
args:
model: "granite-8b-code-instruct-128k"
base_url: "https://granite-8b-code-instruct-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1"
Note I see success when I use
parasols-maas-llama: &active
environment:
OPENAI_API_KEY: "redacted"
provider: "ChatOpenAI"
args:
model: "meta-llama/Llama-3.1-8B-Instruct"
base_url: "https://llama-3-1-8b-instruct-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1"
kai rpc server is initialized!
Current guess is there is different behavior by the endpoints serving each model from this service.
I believe the fix for this is to revert https://github.com/konveyor/kai/pull/685. Thoughts @shawn-hurley ? Note that this will probably require re-opening https://github.com/konveyor/kai/issues/627
Added a real fix in #703
Here is the entire provider-settitngs
# This is a sample settings file generated by the extension with a few common providers
# and models defined. To return the file back to default, delete it and restart vscode.
---
environment:
ALWAYS_APPLIED_KEY: "envvar to be set regardless of which model is active"
# This is a collection of model configurations that may be used. The `&active`
# anchor is used to reference the model to the active node below. The extension will
# use the active node for configuration the kai-rpc-server.
models:
OpenAI:
environment: &active
OPENAI_API_KEY: "key"
provider: "ChatOpenAI"
args:
model: "gpt-4o"
parasols-maas:
environment:
OPENAI_API_KEY: "token"
provider: "ChatOpenAI"
args:
model: "meta-llama/Llama-3.1-8B-Instruct"
base_url: "--"
rhoai:
environment:
SSL_CERT_FILE: "/Users/sshaaf/ca-cert.pem"
REQUESTS_CA_BUNDLE: "/Users/sshaaf/ca-cert.pem"
OPENAI_API_KEY: "token from OpenShift IA"
provider: "ChatOpenAI"
args:
model: "granite-7b-instruct"
base_url: "vLLM serving- OpenShift AI (self hosted url)"
# AmazonBedrock:
# environment:
# AWS_ACCESS_KEY_ID: ""
# AWS_SECRET_ACCESS_KEY: ""
# AWS_DEFAULT_REGION: ""
# provider: "ChatBedrock"
# args:
# model_id: "meta.llama3-70b-instruct-v1:0"
# If running locally https://ollama.com/, get the model name via `ollama list`
# ChatOllama:
# provider: "ChatOllama"
# args:
# model": "your-model-here"
# JustAnExample:
# environment:
# ANY_KEY_1: "any environment variable needed for this model provider"
# ANY_KEY_2: "any environment variable needed for this model provider"
# provider: "provider-string"
# args:
# anyArgumentName1: "argument one"
# anyArgumentName2: "argument two"
# any-argument-name-3: "argument three"
# template: "template string" # optional
# llamaHeader: "header string" # optional
# llmRetries: 5 # optional number, defaults to 5
# llmRetryDelay: 10.0 # optional float, default is 10.0
# This is the node used for configuring the server. A simple anchor/reference
# pair is an easy way to to select a configuration. To change configs, move the
# `&active` anchor to the desired block and restart the server.
active: *active
This was completed a while ago.