feat: Jan API Server should support proxying remote providers.
Jan version
0.5.17
Describe the Bug
When the API server is enabled and the /models endpoint is queried, the list returns cloud models even when the model engines are disabled and after a full restart of Jan.
Steps to Reproduce
- Start Jan
- disable all remote engines
- restart Jan
- enable API server
- query
/modelsendpoint
Screenshots / Logs
{
"data": [
{
"created": 1750162040,
"engine": "anthropic",
"id": "claude-3-opus-latest",
"inference_params": {
"max_temperature": 1,
"max_tokens": 4096,
"stream": true,
"temperature": 0.7
},
"model": "claude-3-opus-latest",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "anthropic",
"id": "claude-3-5-haiku-latest",
"inference_params": {
"max_temperature": 1,
"max_tokens": 8192,
"stream": true,
"temperature": 0.7
},
"model": "claude-3-5-haiku-latest",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "anthropic",
"id": "claude-3-5-sonnet-latest",
"inference_params": {
"max_temperature": 1,
"max_tokens": 8192,
"stream": true,
"temperature": 0.7
},
"model": "claude-3-5-sonnet-latest",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "anthropic",
"id": "claude-3-7-sonnet-latest",
"inference_params": {
"max_temperature": 1,
"max_tokens": 8192,
"stream": true,
"temperature": 0.7
},
"model": "claude-3-7-sonnet-latest",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openai",
"id": "gpt-4.5-preview",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 16384,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "gpt-4.5-preview",
"object": "model",
"owned_by": "",
"version": "1.2"
},
{
"created": 1750162040,
"engine": "openai",
"id": "gpt-4-turbo",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 4096,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "gpt-4-turbo",
"object": "model",
"owned_by": "",
"version": "1.2"
},
{
"created": 1750162040,
"engine": "openai",
"id": "gpt-3.5-turbo",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 4096,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "gpt-3.5-turbo",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "openai",
"id": "gpt-4o",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 4096,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "gpt-4o",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "openai",
"id": "gpt-4o-mini",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 16384,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "gpt-4o-mini",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "openai",
"id": "o1",
"inference_params": {
"max_tokens": 100000
},
"model": "o1",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openai",
"id": "o1-preview",
"inference_params": {
"max_tokens": 32768,
"stream": true
},
"model": "o1-preview",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openai",
"id": "o1-mini",
"inference_params": {
"max_tokens": 65536,
"stream": true
},
"model": "o1-mini",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openai",
"id": "o3-mini",
"inference_params": {
"max_tokens": 100000,
"stream": true
},
"model": "o3-mini",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "cohere",
"id": "command-r-plus",
"inference_params": {
"max_temperature": 1,
"max_tokens": 4096,
"stream": true,
"temperature": 0.7
},
"model": "command-r-plus",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "cohere",
"id": "command-r",
"inference_params": {
"max_temperature": 1,
"max_tokens": 4096,
"stream": true,
"temperature": 0.7
},
"model": "command-r",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "cohere",
"id": "command-a-03-2025",
"inference_params": {
"max_temperature": 1,
"max_tokens": 4096,
"stream": true,
"temperature": 0.7
},
"model": "command-a-03-2025",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openrouter",
"id": "deepseek/deepseek-r1:free",
"inference_params": {
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "deepseek/deepseek-r1:free",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openrouter",
"id": "deepseek/deepseek-r1-distill-llama-70b:free",
"inference_params": {
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "deepseek/deepseek-r1-distill-llama-70b:free",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openrouter",
"id": "meta-llama/llama-3.1-405b-instruct:free",
"inference_params": {
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "meta-llama/llama-3.1-405b-instruct:free",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openrouter",
"id": "qwen/qwen-vl-plus:free",
"inference_params": {
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "qwen/qwen-vl-plus:free",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "openrouter",
"id": "qwen/qwen2.5-vl-72b-instruct:free",
"inference_params": {
"frequency_penalty": 0,
"presence_penalty": 0,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "qwen/qwen2.5-vl-72b-instruct:free",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "groq",
"id": "llama3-70b-8192",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 8192,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "llama3-70b-8192",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "groq",
"id": "llama3-8b-8192",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 8192,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "llama3-8b-8192",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "groq",
"id": "llama-3.1-8b-instant",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 8000,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "llama-3.1-8b-instant",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "groq",
"id": "gemma2-9b-it",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 4096,
"presence_penalty": 0,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "gemma2-9b-it",
"object": "model",
"owned_by": "",
"version": "1.2"
},
{
"created": 1750162040,
"engine": "groq",
"id": "llama-3.3-70b-versatile",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 4096,
"presence_penalty": 0,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "llama-3.3-70b-versatile",
"object": "model",
"owned_by": "",
"version": "3.3"
},
{
"created": 1750162040,
"engine": "mistral",
"id": "mistral-small-latest",
"inference_params": {
"max_temperature": 1,
"max_tokens": 32000,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "mistral-small-latest",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "mistral",
"id": "mistral-large-latest",
"inference_params": {
"max_temperature": 1,
"max_tokens": 32000,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "mistral-large-latest",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "mistral",
"id": "open-mixtral-8x22b",
"inference_params": {
"max_temperature": 1,
"max_tokens": 32000,
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "open-mixtral-8x22b",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "martian",
"id": "router",
"inference_params": {
"frequency_penalty": 0,
"max_tokens": 4096,
"presence_penalty": 0,
"stop": [],
"stream": true,
"temperature": 0.7,
"top_p": 0.95
},
"model": "router",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "nvidia",
"id": "mistralai/mistral-7b-instruct-v0.2",
"inference_params": {
"frequency_penalty": 0,
"max_temperature": 1,
"max_tokens": 1024,
"presence_penalty": 0,
"seed": null,
"stop": null,
"stream": false,
"temperature": 0.3,
"top_p": 1
},
"model": "mistralai/mistral-7b-instruct-v0.2",
"object": "model",
"owned_by": "",
"version": "1.1"
},
{
"created": 1750162040,
"engine": "deepseek",
"id": "deepseek-chat",
"inference_params": {
"max_tokens": 8192,
"stream": true,
"temperature": 0.6
},
"model": "deepseek-chat",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "deepseek",
"id": "deepseek-reasoner",
"inference_params": {
"max_tokens": 8192,
"stream": true,
"temperature": 0.6
},
"model": "deepseek-reasoner",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "google_gemini",
"id": "gemini-1.5-flash",
"inference_params": {
"max_tokens": 8192,
"stream": true,
"temperature": 0.6
},
"model": "gemini-1.5-flash",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "google_gemini",
"id": "gemini-1.5-flash-8b",
"inference_params": {
"max_tokens": 8192,
"stream": true,
"temperature": 0.6
},
"model": "gemini-1.5-flash-8b",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "google_gemini",
"id": "gemini-1.5-pro",
"inference_params": {
"max_tokens": 8192,
"stream": true,
"temperature": 0.6
},
"model": "gemini-1.5-pro",
"object": "model",
"owned_by": "",
"version": "1.0"
},
{
"created": 1750162040,
"engine": "google_gemini",
"id": "gemini-2.5-pro-preview-05-06",
"inference_params": {
"max_tokens": 65536,
"stream": true,
"temperature": 0.6
},
"model": "gemini-2.5-pro-preview-05-06",
"object": "model",
"owned_by": "",
"version": "1.0"
},
...
What is your OS?
- [ ] MacOS
- [x] Windows
- [ ] Linux
Looking at the My Models area of the settings, the remote engines are still listed there even when the engines are disabled. So my guess is that even having the engines installed results in their models appearing in My Models
what do you think @louis-menlo @urmauur ?
This is now better AND worse in 0.6.0. none of the cloud provider/remote models are listed now, even the enabled ones.
I have Anthropic enabled, but here's the response from /v1/models:
{
"data": [
{
"ai_template": "<|Assistant|>",
"created": 15,
"ctx_len": 4096,
"dynatemp_exponent": 1.0,
"dynatemp_range": 0.0,
"engine": "llama-cpp",
"files": [
"models\\cortex.so\\deepseek-r1-distill-qwen-14b\\14b\\model.gguf"
],
"frequency_penalty": 0.0,
"gpu_arch": "",
"id": "deepseek-r1-distill-qwen-14b:14b",
"ignore_eos": false,
"max_tokens": 4096,
"min_keep": 0,
"min_p": 0.05,
"mirostat": false,
"mirostat_eta": 0.1,
"mirostat_tau": 5.0,
"model": "deepseek-r1-distill-qwen-14b:14b",
"n_parallel": 1,
"n_probs": 0,
"name": "deepseek-r1-distill-qwen-14b:14b",
"ngl": 49,
"object": "",
"os": "",
"owned_by": "",
"penalize_nl": false,
"precision": "",
"presence_penalty": 0.0,
"prompt_template": "{system_message}<|User|>{prompt}<|Assistant|>",
"quantization_method": "",
"repeat_last_n": 64,
"repeat_penalty": 1.0,
"seed": -1,
"size": 8988110908,
"status": "downloaded",
"stop": [
"<|end▁of▁sentence|>"
],
"stream": true,
"system_template": "",
"temperature": 0.7,
"text_model": false,
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.9,
"typ_p": 1.0,
"user_template": "<|User|>",
"version": "1"
},
{
"ai_template": "<|Assistant|><|end▁of▁sentence|><|Assistant|>",
"created": 15,
"ctx_len": 4096,
"dynatemp_exponent": 1.0,
"dynatemp_range": 0.0,
"engine": "llama-cpp",
"files": [
"models\\cortex.so\\deepseek-r1-distill-qwen-7b\\7b\\model.gguf"
],
"frequency_penalty": 0.0,
"gpu_arch": "",
"id": "deepseek-r1-distill-qwen-7b:7b",
"ignore_eos": false,
"max_tokens": 4096,
"min_keep": 0,
"min_p": 0.05,
"mirostat": false,
"mirostat_eta": 0.1,
"mirostat_tau": 5.0,
"model": "deepseek-r1-distill-qwen-7b:7b",
"n_parallel": 1,
"n_probs": 0,
"name": "deepseek-r1-distill-qwen-7b:7b",
"ngl": 29,
"object": "",
"os": "",
"owned_by": "",
"penalize_nl": false,
"precision": "",
"presence_penalty": 0.0,
"prompt_template": "<|begin▁of▁sentence|>{system_prompt}<|User|>{prompt}<|Assistant|><|end▁of▁sentence|><|Assistant|>",
"quantization_method": "",
"repeat_last_n": 64,
"repeat_penalty": 1.0,
"seed": -1,
"size": 4683074270,
"status": "downloaded",
"stop": [
"<|end▁of▁sentence|>"
],
"stream": true,
"system_template": "<|begin▁of▁sentence|>",
"temperature": 0.7,
"text_model": false,
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.9,
"typ_p": 1.0,
"user_template": "<|User|>",
"version": "1"
},
{
"ai_template": "<|im_end|>\n<|im_start|>assistant\n",
"created": 15,
"ctx_len": 4096,
"dynatemp_exponent": 1.0,
"dynatemp_range": 0.0,
"engine": "llama-cpp",
"files": [
"models\\cortex.so\\gemma3\\1b\\model.gguf"
],
"frequency_penalty": 0.0,
"gpu_arch": "",
"id": "gemma3:1b",
"ignore_eos": false,
"max_tokens": 4096,
"min_keep": 0,
"min_p": 0.05,
"mirostat": false,
"mirostat_eta": 0.1,
"mirostat_tau": 5.0,
"model": "gemma3:1b",
"n_parallel": 1,
"n_probs": 0,
"name": "gemma3:1b",
"ngl": 27,
"object": "",
"os": "",
"owned_by": "",
"penalize_nl": false,
"precision": "",
"presence_penalty": 0.0,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
"quantization_method": "",
"repeat_last_n": 64,
"repeat_penalty": 1.0,
"seed": -1,
"size": 806059053,
"status": "downloaded",
"stop": [
"<|im_end|>"
],
"stream": true,
"system_template": "<|im_start|>system\n",
"temperature": 0.7,
"text_model": false,
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.9,
"typ_p": 1.0,
"user_template": "<|im_end|>\n<|im_start|>user\n",
"version": "1"
},
{
"ai_template": "<|im_end|>\n<|im_start|>assistant\n",
"created": 15,
"ctx_len": 4096,
"dynatemp_exponent": 1.0,
"dynatemp_range": 0.0,
"engine": "llama-cpp",
"files": [
"models\\cortex.so\\qwen3\\0.6b\\model.gguf"
],
"frequency_penalty": 0.0,
"gpu_arch": "",
"id": "qwen3:0.6b",
"ignore_eos": false,
"max_tokens": 4096,
"min_keep": 0,
"min_p": 0.05,
"mirostat": false,
"mirostat_eta": 0.1,
"mirostat_tau": 5.0,
"model": "qwen3:0.6b",
"n_parallel": 1,
"n_probs": 0,
"name": "qwen3:0.6b",
"ngl": 29,
"object": "",
"os": "",
"owned_by": "",
"penalize_nl": false,
"precision": "",
"presence_penalty": 0.0,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
"quantization_method": "",
"repeat_last_n": 64,
"repeat_penalty": 1.0,
"seed": -1,
"size": 484220888,
"status": "downloaded",
"stop": [
"<|im_end|>"
],
"stream": true,
"system_template": "<|im_start|>system\n",
"temperature": 0.7,
"text_model": false,
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.9,
"typ_p": 1.0,
"user_template": "<|im_end|>\n<|im_start|>user\n",
"version": "1"
},
{
"ai_template": "<|im_end|>\n<|im_start|>assistant\n",
"created": 15,
"ctx_len": 4096,
"dynatemp_exponent": 1.0,
"dynatemp_range": 0.0,
"engine": "llama-cpp",
"files": [
"models\\cortex.so\\qwen3\\14b\\model.gguf"
],
"frequency_penalty": 0.0,
"gpu_arch": "",
"id": "qwen3:14b",
"ignore_eos": false,
"max_tokens": 4096,
"min_keep": 0,
"min_p": 0.05,
"mirostat": false,
"mirostat_eta": 0.1,
"mirostat_tau": 5.0,
"model": "qwen3:14b",
"n_parallel": 1,
"n_probs": 0,
"name": "qwen3:14b",
"ngl": 41,
"object": "",
"os": "",
"owned_by": "",
"penalize_nl": false,
"precision": "",
"presence_penalty": 0.0,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
"quantization_method": "",
"repeat_last_n": 64,
"repeat_penalty": 1.0,
"seed": -1,
"size": 9001754197,
"status": "downloaded",
"stop": [
"<|im_end|>"
],
"stream": true,
"system_template": "<|im_start|>system\n",
"temperature": 0.7,
"text_model": false,
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.9,
"typ_p": 1.0,
"user_template": "<|im_end|>\n<|im_start|>user\n",
"version": "1"
},
{
"ai_template": "<|im_end|>\n<|im_start|>assistant\n",
"created": 15,
"ctx_len": 4096,
"dynatemp_exponent": 1.0,
"dynatemp_range": 0.0,
"engine": "llama-cpp",
"files": [
"models\\cortex.so\\qwen3\\8b\\model.gguf"
],
"frequency_penalty": 0.0,
"gpu_arch": "",
"id": "qwen3:8b",
"ignore_eos": false,
"max_tokens": 4096,
"min_keep": 0,
"min_p": 0.05,
"mirostat": false,
"mirostat_eta": 0.1,
"mirostat_tau": 5.0,
"model": "qwen3:8b",
"n_parallel": 1,
"n_probs": 0,
"name": "qwen3:8b",
"ngl": 37,
"object": "",
"os": "",
"owned_by": "",
"penalize_nl": false,
"precision": "",
"presence_penalty": 0.0,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
"quantization_method": "",
"repeat_last_n": 64,
"repeat_penalty": 1.0,
"seed": -1,
"size": 5027784713,
"status": "downloaded",
"stop": [
"<|im_end|>"
],
"stream": true,
"system_template": "<|im_start|>system\n",
"temperature": 0.7,
"text_model": false,
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.9,
"typ_p": 1.0,
"user_template": "<|im_end|>\n<|im_start|>user\n",
"version": "1"
}
],
"object": "list",
"result": "OK"
}
Notice that no claude models are listed.
Thanks @thoraxe, we will improve this in the upcoming versions.
@LazyYuuki I dont think this issue is categorized correctly:
- maybe not an epic?
- subissues are unrelated
recommended: untangle from subissues. Demote to just a feat
It hasn't even begun to be implemented. It shouldn't be labeled as "in-review."