diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ec61fc4b9b06e..7763dbc392e4b 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1131,6 +1131,10 @@ class EngineArgs: device_config = DeviceConfig( device=cast(Device, current_platform.device_type)) + model_config = self.create_model_config() + self.model = model_config.model + self.tokenizer = model_config.tokenizer + (self.model, self.tokenizer, self.speculative_config) = maybe_override_with_speculators( model=self.model, @@ -1139,7 +1143,6 @@ class EngineArgs: trust_remote_code=self.trust_remote_code, vllm_speculative_config=self.speculative_config, ) - model_config = self.create_model_config() # * If VLLM_USE_V1 is unset, we enable V1 for "supported features" # and fall back to V0 for experimental or unsupported features.