From 0fc8fa751a4321d6531467537ff77cf3c1c70260 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Sun, 17 Aug 2025 15:56:07 -0700 Subject: [PATCH] fix: gptq marlin weight loading failure (#23066) --- vllm/model_executor/layers/quantization/gptq_marlin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index bd14ab9ef6c69..c5d1e017014f3 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -56,7 +56,7 @@ def get_moe_quant_method( # Dynamic per module/layer rules may override base config override_config(cloned_config, prefix=prefix) - return moe_method_cls(cloned_config) + return moe_method_cls(cloned_config, layer.moe_config) return None