[Bugfix] resolve Qwen3-VL GPTQModel quantized model loading failure (#28663)

Signed-off-by: GuanH <guansdrailib@gmail.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
GuanH 2025-11-15 02:44:27 +08:00 committed by GitHub
parent e2741f6cbc
commit cec275efce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 3 deletions

View File

@ -1138,7 +1138,9 @@ class Qwen3LLMForCausalLM(Qwen3ForCausalLM):
self.config = config
self.quant_config = quant_config
self.model = Qwen3LLMModel(vllm_config=vllm_config, prefix=prefix)
self.model = Qwen3LLMModel(
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
)
if get_pp_group().is_last_rank:
if config.tie_word_embeddings:

View File

@ -117,9 +117,10 @@ class AutoWeightsLoader:
environment variable `VLLM_LOGGING_LEVEL=DEBUG`.
"""
# Models trained using early version ColossalAI
# may include these tensors in checkpoint. Skip them.
# Models trained using early version ColossalAI or quantized by
# GPTQModel may include these tensors in checkpoint. Skip them.
ROTARY_EMBEDS_UNUSED_WEIGHTS = [
"rotary_pos_emb.inv_freq",
"rotary_emb.inv_freq",
"rotary_emb.cos_cached",
"rotary_emb.sin_cached",