[Bugfix] resolve Qwen3-VL GPTQModel quantized model loading failure (#28663)

Signed-off-by: GuanH <guansdrailib@gmail.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-12-10 23:35:52 +08:00 · 2025-11-15 02:44:27 +08:00 · 2025-11-15 02:44:27 +08:00 · cec275efce
commit cec275efce
parent e2741f6cbc
2 changed files with 6 additions and 3 deletions
--- a/vllm/model_executor/models/qwen3_vl.py
+++ b/vllm/model_executor/models/qwen3_vl.py
@ -1138,7 +1138,9 @@ class Qwen3LLMForCausalLM(Qwen3ForCausalLM):
        self.config = config
        self.quant_config = quant_config
-        self.model = Qwen3LLMModel(vllm_config=vllm_config, prefix=prefix)
+        self.model = Qwen3LLMModel(
            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
        )
        if get_pp_group().is_last_rank:
            if config.tie_word_embeddings:
--- a/vllm/model_executor/models/utils.py
+++ b/vllm/model_executor/models/utils.py
@ -117,9 +117,10 @@ class AutoWeightsLoader:
    environment variable `VLLM_LOGGING_LEVEL=DEBUG`.
    """
-    # Models trained using early version ColossalAI
+    # Models trained using early version ColossalAI or quantized by
-    # may include these tensors in checkpoint. Skip them.
+    # GPTQModel may include these tensors in checkpoint. Skip them.
    ROTARY_EMBEDS_UNUSED_WEIGHTS = [
        "rotary_pos_emb.inv_freq",
        "rotary_emb.inv_freq",
        "rotary_emb.cos_cached",
        "rotary_emb.sin_cached",