jan icon indicating copy to clipboard operation
jan copied to clipboard

feat: Jan API Server should support proxying remote providers.

Open thoraxe opened this issue 6 months ago • 6 comments

Jan version

0.5.17

Describe the Bug

When the API server is enabled and the /models endpoint is queried, the list returns cloud models even when the model engines are disabled and after a full restart of Jan.

Steps to Reproduce

  1. Start Jan
  2. disable all remote engines
  3. restart Jan
  4. enable API server
  5. query /models endpoint

Screenshots / Logs

{
  "data": [
    {
      "created": 1750162040,
      "engine": "anthropic",
      "id": "claude-3-opus-latest",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 4096,
        "stream": true,
        "temperature": 0.7
      },
      "model": "claude-3-opus-latest",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "anthropic",
      "id": "claude-3-5-haiku-latest",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.7
      },
      "model": "claude-3-5-haiku-latest",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "anthropic",
      "id": "claude-3-5-sonnet-latest",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.7
      },
      "model": "claude-3-5-sonnet-latest",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "anthropic",
      "id": "claude-3-7-sonnet-latest",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.7
      },
      "model": "claude-3-7-sonnet-latest",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "gpt-4.5-preview",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 16384,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "gpt-4.5-preview",
      "object": "model",
      "owned_by": "",
      "version": "1.2"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "gpt-4-turbo",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 4096,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "gpt-4-turbo",
      "object": "model",
      "owned_by": "",
      "version": "1.2"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "gpt-3.5-turbo",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 4096,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "gpt-3.5-turbo",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "gpt-4o",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 4096,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "gpt-4o",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "gpt-4o-mini",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 16384,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "gpt-4o-mini",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "o1",
      "inference_params": {
        "max_tokens": 100000
      },
      "model": "o1",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "o1-preview",
      "inference_params": {
        "max_tokens": 32768,
        "stream": true
      },
      "model": "o1-preview",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "o1-mini",
      "inference_params": {
        "max_tokens": 65536,
        "stream": true
      },
      "model": "o1-mini",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openai",
      "id": "o3-mini",
      "inference_params": {
        "max_tokens": 100000,
        "stream": true
      },
      "model": "o3-mini",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "cohere",
      "id": "command-r-plus",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 4096,
        "stream": true,
        "temperature": 0.7
      },
      "model": "command-r-plus",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "cohere",
      "id": "command-r",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 4096,
        "stream": true,
        "temperature": 0.7
      },
      "model": "command-r",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "cohere",
      "id": "command-a-03-2025",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 4096,
        "stream": true,
        "temperature": 0.7
      },
      "model": "command-a-03-2025",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openrouter",
      "id": "deepseek/deepseek-r1:free",
      "inference_params": {
        "frequency_penalty": 0,
        "presence_penalty": 0,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "deepseek/deepseek-r1:free",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openrouter",
      "id": "deepseek/deepseek-r1-distill-llama-70b:free",
      "inference_params": {
        "frequency_penalty": 0,
        "presence_penalty": 0,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "deepseek/deepseek-r1-distill-llama-70b:free",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openrouter",
      "id": "meta-llama/llama-3.1-405b-instruct:free",
      "inference_params": {
        "frequency_penalty": 0,
        "presence_penalty": 0,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "meta-llama/llama-3.1-405b-instruct:free",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openrouter",
      "id": "qwen/qwen-vl-plus:free",
      "inference_params": {
        "frequency_penalty": 0,
        "presence_penalty": 0,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "qwen/qwen-vl-plus:free",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "openrouter",
      "id": "qwen/qwen2.5-vl-72b-instruct:free",
      "inference_params": {
        "frequency_penalty": 0,
        "presence_penalty": 0,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "qwen/qwen2.5-vl-72b-instruct:free",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "groq",
      "id": "llama3-70b-8192",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 8192,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "llama3-70b-8192",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "groq",
      "id": "llama3-8b-8192",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 8192,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "llama3-8b-8192",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "groq",
      "id": "llama-3.1-8b-instant",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 8000,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "llama-3.1-8b-instant",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "groq",
      "id": "gemma2-9b-it",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 4096,
        "presence_penalty": 0,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "gemma2-9b-it",
      "object": "model",
      "owned_by": "",
      "version": "1.2"
    },
    {
      "created": 1750162040,
      "engine": "groq",
      "id": "llama-3.3-70b-versatile",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 4096,
        "presence_penalty": 0,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "llama-3.3-70b-versatile",
      "object": "model",
      "owned_by": "",
      "version": "3.3"
    },
    {
      "created": 1750162040,
      "engine": "mistral",
      "id": "mistral-small-latest",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 32000,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "mistral-small-latest",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "mistral",
      "id": "mistral-large-latest",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 32000,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "mistral-large-latest",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "mistral",
      "id": "open-mixtral-8x22b",
      "inference_params": {
        "max_temperature": 1,
        "max_tokens": 32000,
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "open-mixtral-8x22b",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "martian",
      "id": "router",
      "inference_params": {
        "frequency_penalty": 0,
        "max_tokens": 4096,
        "presence_penalty": 0,
        "stop": [],
        "stream": true,
        "temperature": 0.7,
        "top_p": 0.95
      },
      "model": "router",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "nvidia",
      "id": "mistralai/mistral-7b-instruct-v0.2",
      "inference_params": {
        "frequency_penalty": 0,
        "max_temperature": 1,
        "max_tokens": 1024,
        "presence_penalty": 0,
        "seed": null,
        "stop": null,
        "stream": false,
        "temperature": 0.3,
        "top_p": 1
      },
      "model": "mistralai/mistral-7b-instruct-v0.2",
      "object": "model",
      "owned_by": "",
      "version": "1.1"
    },
    {
      "created": 1750162040,
      "engine": "deepseek",
      "id": "deepseek-chat",
      "inference_params": {
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.6
      },
      "model": "deepseek-chat",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "deepseek",
      "id": "deepseek-reasoner",
      "inference_params": {
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.6
      },
      "model": "deepseek-reasoner",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "google_gemini",
      "id": "gemini-1.5-flash",
      "inference_params": {
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.6
      },
      "model": "gemini-1.5-flash",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "google_gemini",
      "id": "gemini-1.5-flash-8b",
      "inference_params": {
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.6
      },
      "model": "gemini-1.5-flash-8b",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "google_gemini",
      "id": "gemini-1.5-pro",
      "inference_params": {
        "max_tokens": 8192,
        "stream": true,
        "temperature": 0.6
      },
      "model": "gemini-1.5-pro",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
    {
      "created": 1750162040,
      "engine": "google_gemini",
      "id": "gemini-2.5-pro-preview-05-06",
      "inference_params": {
        "max_tokens": 65536,
        "stream": true,
        "temperature": 0.6
      },
      "model": "gemini-2.5-pro-preview-05-06",
      "object": "model",
      "owned_by": "",
      "version": "1.0"
    },
...

What is your OS?

  • [ ] MacOS
  • [x] Windows
  • [ ] Linux

thoraxe avatar Jun 17 '25 13:06 thoraxe

Looking at the My Models area of the settings, the remote engines are still listed there even when the engines are disabled. So my guess is that even having the engines installed results in their models appearing in My Models

thoraxe avatar Jun 17 '25 13:06 thoraxe

what do you think @louis-menlo @urmauur ?

david-menloai avatar Jun 17 '25 13:06 david-menloai

This is now better AND worse in 0.6.0. none of the cloud provider/remote models are listed now, even the enabled ones.

I have Anthropic enabled, but here's the response from /v1/models:

{
  "data": [
    {
      "ai_template": "<|Assistant|>",
      "created": 15,
      "ctx_len": 4096,
      "dynatemp_exponent": 1.0,
      "dynatemp_range": 0.0,
      "engine": "llama-cpp",
      "files": [
        "models\\cortex.so\\deepseek-r1-distill-qwen-14b\\14b\\model.gguf"
      ],
      "frequency_penalty": 0.0,
      "gpu_arch": "",
      "id": "deepseek-r1-distill-qwen-14b:14b",
      "ignore_eos": false,
      "max_tokens": 4096,
      "min_keep": 0,
      "min_p": 0.05,
      "mirostat": false,
      "mirostat_eta": 0.1,
      "mirostat_tau": 5.0,
      "model": "deepseek-r1-distill-qwen-14b:14b",
      "n_parallel": 1,
      "n_probs": 0,
      "name": "deepseek-r1-distill-qwen-14b:14b",
      "ngl": 49,
      "object": "",
      "os": "",
      "owned_by": "",
      "penalize_nl": false,
      "precision": "",
      "presence_penalty": 0.0,
      "prompt_template": "{system_message}<|User|>{prompt}<|Assistant|>",
      "quantization_method": "",
      "repeat_last_n": 64,
      "repeat_penalty": 1.0,
      "seed": -1,
      "size": 8988110908,
      "status": "downloaded",
      "stop": [
        "<|end▁of▁sentence|>"
      ],
      "stream": true,
      "system_template": "",
      "temperature": 0.7,
      "text_model": false,
      "tfs_z": 1.0,
      "top_k": 40,
      "top_p": 0.9,
      "typ_p": 1.0,
      "user_template": "<|User|>",
      "version": "1"
    },
    {
      "ai_template": "<|Assistant|><|end▁of▁sentence|><|Assistant|>",
      "created": 15,
      "ctx_len": 4096,
      "dynatemp_exponent": 1.0,
      "dynatemp_range": 0.0,
      "engine": "llama-cpp",
      "files": [
        "models\\cortex.so\\deepseek-r1-distill-qwen-7b\\7b\\model.gguf"
      ],
      "frequency_penalty": 0.0,
      "gpu_arch": "",
      "id": "deepseek-r1-distill-qwen-7b:7b",
      "ignore_eos": false,
      "max_tokens": 4096,
      "min_keep": 0,
      "min_p": 0.05,
      "mirostat": false,
      "mirostat_eta": 0.1,
      "mirostat_tau": 5.0,
      "model": "deepseek-r1-distill-qwen-7b:7b",
      "n_parallel": 1,
      "n_probs": 0,
      "name": "deepseek-r1-distill-qwen-7b:7b",
      "ngl": 29,
      "object": "",
      "os": "",
      "owned_by": "",
      "penalize_nl": false,
      "precision": "",
      "presence_penalty": 0.0,
      "prompt_template": "<|begin▁of▁sentence|>{system_prompt}<|User|>{prompt}<|Assistant|><|end▁of▁sentence|><|Assistant|>",
      "quantization_method": "",
      "repeat_last_n": 64,
      "repeat_penalty": 1.0,
      "seed": -1,
      "size": 4683074270,
      "status": "downloaded",
      "stop": [
        "<|end▁of▁sentence|>"
      ],
      "stream": true,
      "system_template": "<|begin▁of▁sentence|>",
      "temperature": 0.7,
      "text_model": false,
      "tfs_z": 1.0,
      "top_k": 40,
      "top_p": 0.9,
      "typ_p": 1.0,
      "user_template": "<|User|>",
      "version": "1"
    },
    {
      "ai_template": "<|im_end|>\n<|im_start|>assistant\n",
      "created": 15,
      "ctx_len": 4096,
      "dynatemp_exponent": 1.0,
      "dynatemp_range": 0.0,
      "engine": "llama-cpp",
      "files": [
        "models\\cortex.so\\gemma3\\1b\\model.gguf"
      ],
      "frequency_penalty": 0.0,
      "gpu_arch": "",
      "id": "gemma3:1b",
      "ignore_eos": false,
      "max_tokens": 4096,
      "min_keep": 0,
      "min_p": 0.05,
      "mirostat": false,
      "mirostat_eta": 0.1,
      "mirostat_tau": 5.0,
      "model": "gemma3:1b",
      "n_parallel": 1,
      "n_probs": 0,
      "name": "gemma3:1b",
      "ngl": 27,
      "object": "",
      "os": "",
      "owned_by": "",
      "penalize_nl": false,
      "precision": "",
      "presence_penalty": 0.0,
      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
      "quantization_method": "",
      "repeat_last_n": 64,
      "repeat_penalty": 1.0,
      "seed": -1,
      "size": 806059053,
      "status": "downloaded",
      "stop": [
        "<|im_end|>"
      ],
      "stream": true,
      "system_template": "<|im_start|>system\n",
      "temperature": 0.7,
      "text_model": false,
      "tfs_z": 1.0,
      "top_k": 40,
      "top_p": 0.9,
      "typ_p": 1.0,
      "user_template": "<|im_end|>\n<|im_start|>user\n",
      "version": "1"
    },
    {
      "ai_template": "<|im_end|>\n<|im_start|>assistant\n",
      "created": 15,
      "ctx_len": 4096,
      "dynatemp_exponent": 1.0,
      "dynatemp_range": 0.0,
      "engine": "llama-cpp",
      "files": [
        "models\\cortex.so\\qwen3\\0.6b\\model.gguf"
      ],
      "frequency_penalty": 0.0,
      "gpu_arch": "",
      "id": "qwen3:0.6b",
      "ignore_eos": false,
      "max_tokens": 4096,
      "min_keep": 0,
      "min_p": 0.05,
      "mirostat": false,
      "mirostat_eta": 0.1,
      "mirostat_tau": 5.0,
      "model": "qwen3:0.6b",
      "n_parallel": 1,
      "n_probs": 0,
      "name": "qwen3:0.6b",
      "ngl": 29,
      "object": "",
      "os": "",
      "owned_by": "",
      "penalize_nl": false,
      "precision": "",
      "presence_penalty": 0.0,
      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
      "quantization_method": "",
      "repeat_last_n": 64,
      "repeat_penalty": 1.0,
      "seed": -1,
      "size": 484220888,
      "status": "downloaded",
      "stop": [
        "<|im_end|>"
      ],
      "stream": true,
      "system_template": "<|im_start|>system\n",
      "temperature": 0.7,
      "text_model": false,
      "tfs_z": 1.0,
      "top_k": 40,
      "top_p": 0.9,
      "typ_p": 1.0,
      "user_template": "<|im_end|>\n<|im_start|>user\n",
      "version": "1"
    },
    {
      "ai_template": "<|im_end|>\n<|im_start|>assistant\n",
      "created": 15,
      "ctx_len": 4096,
      "dynatemp_exponent": 1.0,
      "dynatemp_range": 0.0,
      "engine": "llama-cpp",
      "files": [
        "models\\cortex.so\\qwen3\\14b\\model.gguf"
      ],
      "frequency_penalty": 0.0,
      "gpu_arch": "",
      "id": "qwen3:14b",
      "ignore_eos": false,
      "max_tokens": 4096,
      "min_keep": 0,
      "min_p": 0.05,
      "mirostat": false,
      "mirostat_eta": 0.1,
      "mirostat_tau": 5.0,
      "model": "qwen3:14b",
      "n_parallel": 1,
      "n_probs": 0,
      "name": "qwen3:14b",
      "ngl": 41,
      "object": "",
      "os": "",
      "owned_by": "",
      "penalize_nl": false,
      "precision": "",
      "presence_penalty": 0.0,
      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
      "quantization_method": "",
      "repeat_last_n": 64,
      "repeat_penalty": 1.0,
      "seed": -1,
      "size": 9001754197,
      "status": "downloaded",
      "stop": [
        "<|im_end|>"
      ],
      "stream": true,
      "system_template": "<|im_start|>system\n",
      "temperature": 0.7,
      "text_model": false,
      "tfs_z": 1.0,
      "top_k": 40,
      "top_p": 0.9,
      "typ_p": 1.0,
      "user_template": "<|im_end|>\n<|im_start|>user\n",
      "version": "1"
    },
    {
      "ai_template": "<|im_end|>\n<|im_start|>assistant\n",
      "created": 15,
      "ctx_len": 4096,
      "dynatemp_exponent": 1.0,
      "dynatemp_range": 0.0,
      "engine": "llama-cpp",
      "files": [
        "models\\cortex.so\\qwen3\\8b\\model.gguf"
      ],
      "frequency_penalty": 0.0,
      "gpu_arch": "",
      "id": "qwen3:8b",
      "ignore_eos": false,
      "max_tokens": 4096,
      "min_keep": 0,
      "min_p": 0.05,
      "mirostat": false,
      "mirostat_eta": 0.1,
      "mirostat_tau": 5.0,
      "model": "qwen3:8b",
      "n_parallel": 1,
      "n_probs": 0,
      "name": "qwen3:8b",
      "ngl": 37,
      "object": "",
      "os": "",
      "owned_by": "",
      "penalize_nl": false,
      "precision": "",
      "presence_penalty": 0.0,
      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
      "quantization_method": "",
      "repeat_last_n": 64,
      "repeat_penalty": 1.0,
      "seed": -1,
      "size": 5027784713,
      "status": "downloaded",
      "stop": [
        "<|im_end|>"
      ],
      "stream": true,
      "system_template": "<|im_start|>system\n",
      "temperature": 0.7,
      "text_model": false,
      "tfs_z": 1.0,
      "top_k": 40,
      "top_p": 0.9,
      "typ_p": 1.0,
      "user_template": "<|im_end|>\n<|im_start|>user\n",
      "version": "1"
    }
  ],
  "object": "list",
  "result": "OK"
}

Notice that no claude models are listed.

thoraxe avatar Jun 19 '25 13:06 thoraxe

Thanks @thoraxe, we will improve this in the upcoming versions.

louis-jan avatar Jun 19 '25 13:06 louis-jan

@LazyYuuki I dont think this issue is categorized correctly:

  • maybe not an epic?
  • subissues are unrelated

recommended: untangle from subissues. Demote to just a feat

freelerobot avatar Jul 02 '25 04:07 freelerobot

It hasn't even begun to be implemented. It shouldn't be labeled as "in-review."

louis-jan avatar Jul 08 '25 02:07 louis-jan