mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 04:25:50 +08:00
[Bugfix] fix _get_quant_method of FusedMoE for deepseekV3.2 on non-NV… (#30057)
Signed-off-by: tjp_zju <tanjianpingzju1990@gmail.com>
This commit is contained in:
parent
0bb0bae436
commit
6ecc1e411b
@ -17,6 +17,9 @@ from vllm.model_executor.layers.fused_moe.layer import (
|
|||||||
FusedMoEMethodBase,
|
FusedMoEMethodBase,
|
||||||
FusedMoeWeightScaleSupported,
|
FusedMoeWeightScaleSupported,
|
||||||
)
|
)
|
||||||
|
from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
|
||||||
|
UnquantizedFusedMoEMethod,
|
||||||
|
)
|
||||||
from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
|
from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
|
||||||
from vllm.model_executor.layers.quantization import QuantizationMethods
|
from vllm.model_executor.layers.quantization import QuantizationMethods
|
||||||
from vllm.model_executor.layers.quantization.base_config import (
|
from vllm.model_executor.layers.quantization.base_config import (
|
||||||
@ -162,6 +165,8 @@ class MoeWNA16Config(QuantizationConfig):
|
|||||||
self, layer: torch.nn.Module, prefix: str
|
self, layer: torch.nn.Module, prefix: str
|
||||||
) -> Optional["QuantizeMethodBase"]:
|
) -> Optional["QuantizeMethodBase"]:
|
||||||
if is_layer_skipped_quant(prefix, self.modules_to_not_convert):
|
if is_layer_skipped_quant(prefix, self.modules_to_not_convert):
|
||||||
|
if isinstance(layer, FusedMoE):
|
||||||
|
return UnquantizedFusedMoEMethod(layer.moe_config)
|
||||||
return UnquantizedLinearMethod()
|
return UnquantizedLinearMethod()
|
||||||
elif isinstance(layer, LinearBase):
|
elif isinstance(layer, LinearBase):
|
||||||
# Avoid circular import
|
# Avoid circular import
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user