[Model][Quantization] Override HF defaults to GGUF ones (incl. Qwen3 MoE) (#30118)

Signed-off-by: Tsukasa OI <floss_llm@irq.a4lg.com>
2025-12-22 19:25:01 +08:00 · 2025-12-15 00:01:42 +09:00 · 2025-12-15 00:01:42 +09:00 · 9e33a1a75b
commit 9e33a1a75b
parent add4b0ca44
1 changed files with 22 additions and 0 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@ -617,6 +617,28 @@ def get_config(
        hf_overrides=hf_overrides_kw,
        **kwargs,
    )
    # Patching defaults for GGUF models
    if _is_gguf:
        # Some models have different default values between GGUF and HF.
        def apply_gguf_default(key: str, gguf_default: Any):
            """
            Apply GGUF defaults unless explicitly configured.
            This function reads/writes external `config` and `config_dict`.
            If the specified `key` is not in `config_dict` (i.e. not explicitly
            configured and the default HF value is used), it updates the
            corresponding `config` value to `gguf_default`.
            """
            if key not in config_dict:
                config.update({key: gguf_default})
        # Apply architecture-specific GGUF defaults.
        if config.model_type in {"qwen3_moe"}:
            # Qwen3 MoE: norm_topk_prob is always true.
            # Note that, this parameter is always false (HF default) on Qwen2 MoE.
            apply_gguf_default("norm_topk_prob", True)
    # Special architecture mapping check for GGUF models
    if _is_gguf:
        if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES: