Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
vllmellm 2025-11-04 14:40:30 +00:00
parent abf597e542
commit aaa0d55587

View File

@ -230,14 +230,12 @@ class RowWiseTorchScaledMMLinearKernel(TorchScaledMMLinearKernel):
class ChannelWiseTorchScaledMMLinearKernel(TorchScaledMMLinearKernel): class ChannelWiseTorchScaledMMLinearKernel(TorchScaledMMLinearKernel):
@classmethod @classmethod
def can_implement(cls, c: FP8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]: def can_implement(cls, c: FP8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]:
per_tensor_activation_scales = ( per_tensor_activation_scales = (
c.activation_quant_key.scale.group_shape.is_per_tensor() c.activation_quant_key.scale.group_shape.is_per_tensor()
) )
per_tensor_weight_scales = c.weight_quant_key.scale.group_shape.is_per_tensor() per_tensor_weight_scales = c.weight_quant_key.scale.group_shape.is_per_tensor()
if per_tensor_activation_scales and per_tensor_weight_scales: if per_tensor_activation_scales and per_tensor_weight_scales:
return ( return (
False, False,