mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:15:01 +08:00
[BUGFIX] GPTQ quantization compatibility for Qwen3 Next MOE models (AutoGPTQ and AutoRound-GPTQ) (#25268)
Signed-off-by: JartX <sagformas@epdcenter.es>
This commit is contained in:
parent
c308501cb6
commit
3642909617
@ -148,9 +148,11 @@ class Qwen3NextSparseMoeBlock(nn.Module):
|
||||
|
||||
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
|
||||
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
|
||||
# seems to avoid gate quantization.
|
||||
# See: https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4
|
||||
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
|
||||
# seems to avoid gate quantization while AutoRound does.
|
||||
if isinstance(
|
||||
quant_config,
|
||||
(GPTQConfig,
|
||||
GPTQMarlinConfig)) and not quant_config.autoround_version:
|
||||
return None
|
||||
return quant_config
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user