From 27a09dc52c8317b531b6d2b862198a8a0d2a88eb Mon Sep 17 00:00:00 2001 From: Kaixi Hou Date: Thu, 20 Feb 2025 22:01:48 -0800 Subject: [PATCH] [NVIDIA] Fix an issue to use current stream for the nvfp4 quant (#13632) --- csrc/quantization/fp4/nvfp4_quant_kernels.cu | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/csrc/quantization/fp4/nvfp4_quant_kernels.cu b/csrc/quantization/fp4/nvfp4_quant_kernels.cu index c3b8e9b3ec427..fef74111624f0 100644 --- a/csrc/quantization/fp4/nvfp4_quant_kernels.cu +++ b/csrc/quantization/fp4/nvfp4_quant_kernels.cu @@ -348,10 +348,7 @@ void scaled_fp4_quant_sm100a(torch::Tensor const& output, auto sf_out = static_cast(output_sf.data_ptr()); auto output_ptr = static_cast(output.data_ptr()); at::cuda::CUDAGuard device_guard{(char)input.get_device()}; - auto stream = at::cuda::getStreamFromPool(false, input.get_device()); - if (stream == nullptr) { - std::cerr << "Warning: Null CUDA stream" << std::endl; - } + auto stream = at::cuda::getCurrentCUDAStream(input.get_device()); // We don't support e8m0 scales at this moment. bool useUE8M0 = false;