diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 65e0b7062153..d2616da84a00 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -757,10 +757,9 @@ class Fp8MoEMethod(FusedMoEMethodBase): layer.w2_weight = torch.nn.Parameter(shuffled_w2, requires_grad=False) - # DeepGemm scales need to be transposed and aligned. We try to do + # DeepGemm scales need to be transposed and aligned. We try to do # it ahead of time for performance reasons. if self.allow_deep_gemm and not is_deep_gemm_e8m0_used(): - # Lazy import to avoid CUDA initialization problems. if _is_col_major(layer.w13_weight_scale_inv): layer.w13_weight_scale_inv = \ get_col_major_tma_aligned_tensor(layer.w13_weight_scale_inv).contiguous()