diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu b/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu index a0852c5732ee6..7b57b32fdb089 100644 --- a/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu +++ b/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu @@ -31,7 +31,8 @@ void cutlass_scaled_fp4_mm(torch::Tensor& D, torch::Tensor const& A, #if defined ENABLE_NVFP4 && ENABLE_NVFP4 return cutlass_scaled_fp4_mm_sm100a(D, A, B, A_sf, B_sf, alpha); #endif - TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled nvfp4 mm kernel, vLLM should " - "be compiled using CUDA 12.8 and target " - "compute capability 100 or above."); + TORCH_CHECK_NOT_IMPLEMENTED(false, + "No compiled nvfp4 mm kernel, vLLM should " + "be compiled using CUDA 12.8 and target " + "compute capability 100 or above."); } diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu b/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu index 26fd91217dbd0..9b30e4fef3567 100644 --- a/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu +++ b/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu @@ -194,8 +194,9 @@ void runGemm(at::Tensor& D, at::Tensor const& A, at::Tensor const& B, at::Tensor const& A_sf, at::Tensor const& B_sf, at::Tensor const& alpha, int64_t m, int64_t n, int64_t k, cudaStream_t stream) { - TORCH_CHECK(false, "Unsupported CUTLASS version. Set VLLM_CUTLASS_SRC_DIR to " - "a CUTLASS 3.8 source directory to enable support."); + TORCH_CHECK(false, + "Unsupported CUTLASS version. Set VLLM_CUTLASS_SRC_DIR to " + "a CUTLASS 3.8 source directory to enable support."); } #endif // defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)