diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index 16ede6113a947..fa34dba371e81 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -360,7 +360,7 @@ class W8A8BlockFp8LinearOp: weight, input_scale, weight_scale, - self.weight_group_shape, + list(self.weight_group_shape), input_2d.dtype, ) @@ -377,7 +377,7 @@ class W8A8BlockFp8LinearOp: weight, input_scale, weight_scale, - self.weight_group_shape, + list(self.weight_group_shape), input_2d.dtype, )