diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index faeb9f81d961..f1c020ab5813 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -1138,7 +1138,9 @@ class Qwen3LLMForCausalLM(Qwen3ForCausalLM): self.config = config self.quant_config = quant_config - self.model = Qwen3LLMModel(vllm_config=vllm_config, prefix=prefix) + self.model = Qwen3LLMModel( + vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model") + ) if get_pp_group().is_last_rank: if config.tie_word_embeddings: diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index f14b79f2886c..e5663c8a057a 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -117,9 +117,10 @@ class AutoWeightsLoader: environment variable `VLLM_LOGGING_LEVEL=DEBUG`. """ - # Models trained using early version ColossalAI - # may include these tensors in checkpoint. Skip them. + # Models trained using early version ColossalAI or quantized by + # GPTQModel may include these tensors in checkpoint. Skip them. ROTARY_EMBEDS_UNUSED_WEIGHTS = [ + "rotary_pos_emb.inv_freq", "rotary_emb.inv_freq", "rotary_emb.cos_cached", "rotary_emb.sin_cached",