PaddleOCR icon indicating copy to clipboard operation
PaddleOCR copied to clipboard

Value error, Model architectures ['PaddleOCRVLForConditionalGeneration'] are not supported for now.

Open hwang136 opened this issue 1 month ago • 1 comments

🔎 Search before asking

  • [x] I have searched the PaddleOCR Docs and found no similar bug report.
  • [x] I have searched the PaddleOCR Issues and found no similar bug report.
  • [x] I have searched the PaddleOCR Discussions and found no similar bug report.

🐛 Bug (问题描述)


ValidationError Traceback (most recent call last) Cell In[6], line 2 1 from vllm import LLM, SamplingParams ----> 2 llm = LLM(model='/data/pe-task/ai_storage_gcp_dsocr_test/20251118/PaddleOCR-VL-0.9B', trust_remote_code=True)

File /usr/local/lib/python3.10/dist-packages/vllm/entrypoints/llm.py:282, in LLM.init(self, model, runner, convert, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, allowed_local_media_path, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, hf_token, hf_overrides, mm_processor_kwargs, override_pooler_config, kv_cache_memory_bytes, compilation_config, logits_processors, **kwargs) 279 log_non_default_args(engine_args) 281 # Create the Engine (autoselects V0 vs V1) --> 282 self.llm_engine = LLMEngine.from_engine_args( 283 engine_args=engine_args, usage_context=UsageContext.LLM_CLASS) 284 self.engine_class = type(self.llm_engine) 286 self.request_counter = Counter()

File /usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py:486, in LLMEngine.from_engine_args(cls, engine_args, usage_context, stat_loggers) 484 """Creates an LLM engine from the engine arguments.""" 485 # Create the engine configs. --> 486 vllm_config = engine_args.create_engine_config(usage_context) 488 engine_cls = cls 489 if envs.VLLM_USE_V1:

File /usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py:1119, in EngineArgs.create_engine_config(self, usage_context, headless) 1115 current_platform.pre_register_and_update() 1117 device_config = DeviceConfig( 1118 device=cast(Device, current_platform.device_type)) -> 1119 model_config = self.create_model_config() 1121 # * If VLLM_USE_V1 is unset, we enable V1 for "supported features" 1122 # and fall back to V0 for experimental or unsupported features. 1123 # * If VLLM_USE_V1=1, we enable V1 for supported + experimental 1124 # features and raise error for unsupported features. 1125 # * If VLLM_USE_V1=0, we disable V1. 1126 use_v1 = False

File /usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py:963, in EngineArgs.create_model_config(self) 956 logger.warning( 957 "--enable-multimodal-encoder-data-parallelis deprecated " 958 "and will be removed in v0.13. " 959 "Please use--mm-encoder-tp-mode data` instead.") 961 self.mm_encoder_tp_mode = "data" --> 963 return ModelConfig( 964 model=self.model, 965 hf_config_path=self.hf_config_path, 966 runner=self.runner, 967 convert=self.convert, 968 task=self.task, 969 tokenizer=self.tokenizer, 970 tokenizer_mode=self.tokenizer_mode, 971 trust_remote_code=self.trust_remote_code, 972 allowed_local_media_path=self.allowed_local_media_path, 973 dtype=self.dtype, 974 seed=self.seed, 975 revision=self.revision, 976 code_revision=self.code_revision, 977 rope_scaling=self.rope_scaling, 978 rope_theta=self.rope_theta, 979 hf_token=self.hf_token, 980 hf_overrides=self.hf_overrides, 981 tokenizer_revision=self.tokenizer_revision, 982 max_model_len=self.max_model_len, 983 quantization=self.quantization, 984 enforce_eager=self.enforce_eager, 985 max_seq_len_to_capture=self.max_seq_len_to_capture, 986 max_logprobs=self.max_logprobs, 987 logprobs_mode=self.logprobs_mode, 988 disable_sliding_window=self.disable_sliding_window, 989 disable_cascade_attn=self.disable_cascade_attn, 990 skip_tokenizer_init=self.skip_tokenizer_init, 991 enable_prompt_embeds=self.enable_prompt_embeds, 992 served_model_name=self.served_model_name, 993 limit_mm_per_prompt=self.limit_mm_per_prompt, 994 interleave_mm_strings=self.interleave_mm_strings, 995 media_io_kwargs=self.media_io_kwargs, 996 skip_mm_profiling=self.skip_mm_profiling, 997 use_async_output_proc=not self.disable_async_output_proc, 998 config_format=self.config_format, 999 mm_processor_kwargs=self.mm_processor_kwargs, 1000 mm_processor_cache_gb=self.mm_processor_cache_gb, 1001 mm_encoder_tp_mode=self.mm_encoder_tp_mode, 1002 override_pooler_config=self.override_pooler_config, 1003 logits_processor_pattern=self.logits_processor_pattern, 1004 generation_config=self.generation_config, 1005 override_generation_config=self.override_generation_config, 1006 enable_sleep_mode=self.enable_sleep_mode, 1007 model_impl=self.model_impl, 1008 override_attention_dtype=self.override_attention_dtype, 1009 logits_processors=self.logits_processors, 1010 io_processor_plugin=self.io_processor_plugin, 1011 )

File /usr/local/lib/python3.10/dist-packages/pydantic/_internal/_dataclasses.py:121, in complete_dataclass..init(dataclass_self, *args, **kwargs) 119 tracebackhide = True 120 s = dataclass_self --> 121 s.pydantic_validator.validate_python(ArgsKwargs(args, kwargs), self_instance=s)

ValidationError: 1 validation error for ModelConfig Value error, Model architectures ['PaddleOCRVLForConditionalGeneration'] are not supported for now. Supported architectures: dict_keys(['ApertusForCausalLM', 'AquilaModel', 'AquilaForCausalLM', 'ArceeForCausalLM', 'ArcticForCausalLM', 'MiniMaxForCausalLM', 'MiniMaxText01ForCausalLM', 'MiniMaxM1ForCausalLM', 'BaiChuanForCausalLM', 'BaichuanForCausalLM', 'BailingMoeForCausalLM', 'BambaForCausalLM', 'BloomForCausalLM', 'ChatGLMModel', 'ChatGLMForConditionalGeneration', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'DbrxForCausalLM', 'DeciLMForCausalLM', 'DeepseekForCausalLM', 'DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM', 'Dots1ForCausalLM', 'Ernie4_5ForCausalLM', 'Ernie4_5_MoeForCausalLM', 'ExaoneForCausalLM', 'Exaone4ForCausalLM', 'FalconForCausalLM', 'Fairseq2LlamaForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForCausalLM', 'Gemma3nForCausalLM', 'Qwen3NextForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'Glm4MoeForCausalLM', 'GptOssForCausalLM', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTJForCausalLM', 'GPTNeoXForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'GraniteMoeHybridForCausalLM', 'GraniteMoeSharedForCausalLM', 'GritLM', 'Grok1ModelForCausalLM', 'HunYuanMoEV1ForCausalLM', 'HunYuanDenseV1ForCausalLM', 'HCXVisionForCausalLM', 'InternLMForCausalLM', 'InternLM2ForCausalLM', 'InternLM2VEForCausalLM', 'InternLM3ForCausalLM', 'JAISLMHeadModel', 'JambaForCausalLM', 'Lfm2ForCausalLM', 'LlamaForCausalLM', 'Llama4ForCausalLM', 'LLaMAForCausalLM', 'MambaForCausalLM', 'FalconMambaForCausalLM', 'FalconH1ForCausalLM', 'Mamba2ForCausalLM', 'MiniCPMForCausalLM', 'MiniCPM3ForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MotifForCausalLM', 'MptForCausalLM', 'MPTForCausalLM', 'MiMoForCausalLM', 'NemotronForCausalLM', 'NemotronHForCausalLM', 'OlmoForCausalLM', 'Olmo2ForCausalLM', 'OlmoeForCausalLM', 'OPTForCausalLM', 'OrionForCausalLM', 'PersimmonForCausalLM', 'PhiForCausalLM', 'Phi3ForCausalLM', 'PhiMoEForCausalLM', 'Phi4FlashForCausalLM', 'Plamo2ForCausalLM', 'QWenLMHeadModel', 'Qwen2ForCausalLM', 'Qwen2MoeForCausalLM', 'Qwen3ForCausalLM', 'Qwen3MoeForCausalLM', 'RWForCausalLM', 'SeedOssForCausalLM', 'Step3TextForCausalLM', 'StableLMEpochForCausalLM', 'StableLmForCausalLM', 'Starcoder2ForCausalLM', 'SolarForCausalLM', 'TeleChat2ForCausalLM', 'TeleFLMForCausalLM', 'XverseForCausalLM', 'Zamba2ForCausalLM', 'BartModel', 'BartForConditionalGeneration', 'MBartForConditionalGeneration', 'BertModel', 'Gemma2Model', 'Gemma3TextModel', 'GPT2ForSequenceClassification', 'GteModel', 'GteNewModel', 'InternLM2ForRewardModel', 'JambaForSequenceClassification', 'LlamaModel', 'MistralModel', 'ModernBertModel', 'NomicBertModel', 'Qwen2Model', 'Qwen2ForRewardModel', 'Qwen2ForProcessRewardModel', 'RobertaForMaskedLM', 'RobertaModel', 'XLMRobertaModel', 'LlavaNextForConditionalGeneration', 'Phi3VForCausalLM', 'Qwen2VLForConditionalGeneration', 'PrithviGeoSpatialMAE', 'Terratorch', 'BertForSequenceClassification', 'GteNewForSequenceClassification', 'ModernBertForSequenceClassification', 'RobertaForSequenceClassification', 'XLMRobertaForSequenceClassification', 'JinaVLForRanking', 'AriaForConditionalGeneration', 'AyaVisionForConditionalGeneration', 'Blip2ForConditionalGeneration', 'ChameleonForConditionalGeneration', 'Cohere2VisionForConditionalGeneration', 'DeepseekVLV2ForCausalLM', 'Ernie4_5_VLMoeForConditionalGeneration', 'FuyuForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3nForConditionalGeneration', 'GLM4VForCausalLM', 'Glm4vForConditionalGeneration', 'Glm4vMoeForConditionalGeneration', 'GraniteSpeechForConditionalGeneration', 'H2OVLChatModel', 'InternVLChatModel', 'NemotronH_Nano_VL', 'InternS1ForConditionalGeneration', 'InternVLForConditionalGeneration', 'Idefics3ForConditionalGeneration', 'SmolVLMForConditionalGeneration', 'KeyeForConditionalGeneration', 'KeyeVL1_5ForConditionalGeneration', 'RForConditionalGeneration', 'KimiVLForConditionalGeneration', 'Llama_Nemotron_Nano_VL', 'LlavaForConditionalGeneration', 'LlavaNextVideoForConditionalGeneration', 'LlavaOnevisionForConditionalGeneration', 'MantisForConditionalGeneration', 'MiDashengLMModel', 'MiniMaxVL01ForConditionalGeneration', 'MiniCPMO', 'MiniCPMV', 'Mistral3ForConditionalGeneration', 'MolmoForCausalLM', 'NVLM_D', 'Ovis', 'Ovis2_5', 'PaliGemmaForConditionalGeneration', 'Phi4MMForCausalLM', 'Phi4MultimodalForCausalLM', 'PixtralForConditionalGeneration', 'QwenVLForConditionalGeneration', 'Qwen2_5_VLForConditionalGeneration', 'Qwen2AudioForConditionalGeneration', 'Qwen2_5OmniModel', 'Qwen2_5OmniForConditionalGeneration', 'UltravoxModel', 'Step3VLForConditionalGeneration', 'TarsierForConditionalGeneration', 'Tarsier2ForConditionalGeneration', 'VoxtralForConditionalGeneration', 'DonutForConditionalGeneration', 'Florence2ForConditionalGeneration', 'MllamaForConditionalGeneration', 'Llama4ForConditionalGeneration', 'SkyworkR1VChatModel', 'WhisperForConditionalGeneration', 'MiMoMTPModel', 'EagleLlamaForCausalLM', 'EagleLlama4ForCausalLM', 'EagleMiniCPMForCausalLM', 'Eagle3LlamaForCausalLM', 'LlamaForCausalLMEagle3', 'EagleDeepSeekMTPModel', 'DeepSeekMTPModel', 'ErnieMTPModel', 'Glm4MoeMTPModel', 'MedusaModel', 'Qwen3NextMTP', 'SmolLM3ForCausalLM', 'Emu3ForConditionalGeneration', 'TransformersModel', 'TransformersForCausalLM', 'TransformersForMultimodalLM']) [type=value_error, input_value=ArgsKwargs((), {'model': ...rocessor_plugin': None}), input_type=ArgsKwargs] For further information visit https://errors.pydantic.dev/2.12/v/value_error

🏃‍♂️ Environment (运行环境)

vllm==0.11.1

🌰 Minimal Reproducible Example (最小可复现问题的Demo)

python code from vllm import LLM, SamplingParams llm = LLM(model='yourLocalPath/PaddleOCR-VL-0.9B', trust_remote_code=True)

command line vllm serve PaddlePaddle/PaddleOCR-VL
--trust-remote-code
--max-num-batched-tokens 16384
--no-enable-prefix-caching
--mm-processor-cache-gb 0

hwang136 avatar Nov 24 '25 07:11 hwang136

未能复现您的情况,建议检查vllm版本是否确定是0.11.1

Image Image

zhang-prog avatar Nov 26 '25 10:11 zhang-prog