diff --git a/vllm/config/model.py b/vllm/config/model.py index 7f6313005737b..84e59dfc83d2e 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -508,12 +508,7 @@ class ModelConfig: self.hf_image_processor_config = get_hf_image_processor_config( self.model, hf_token=self.hf_token, revision=self.revision ) - self.model_arch_config = None - convertor_cls = MODEL_ARCH_CONFIG_CONVERTORS.get( - hf_config.model_type, ModelArchConfigConvertorBase - ) - convertor = convertor_cls(hf_config) - self.model_arch_config = convertor.convert(self.model, self.revision) + self.model_arch_config = self.get_model_arch_config() architectures = self.architectures registry = self.registry @@ -717,6 +712,13 @@ class ModelConfig: self._verify_cuda_graph() self._verify_bnb_config() + def get_model_arch_config(self) -> ModelArchitectureConfig: + convertor_cls = MODEL_ARCH_CONFIG_CONVERTORS.get( + self.hf_config.model_type, ModelArchConfigConvertorBase + ) + convertor = convertor_cls(self.hf_config) + return convertor.convert(self.model, self.revision) + @field_validator("tokenizer_mode", mode="after") def _lowercase_tokenizer_mode(cls, tokenizer_mode: str) -> str: return tokenizer_mode.lower() diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 614a3226cb711..f03d9d768c740 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -421,6 +421,7 @@ class VllmConfig: model_config = copy.deepcopy(self.model_config) model_config.hf_config = hf_config + model_config.model_arch_config = model_config.get_model_arch_config() return replace(self, model_config=model_config)