From d6da8a8ff22e555ce516ca8ce4d005b1bd1d9fe2 Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Mon, 28 Apr 2025 19:23:18 -0700 Subject: [PATCH] [Bugfix] Fix `numel()` downcast in fused_layernorm_dynamic_per_token_quant.cu (#17316) --- .../fused_kernels/fused_layernorm_dynamic_per_token_quant.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu b/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu index 2b6ab7fcec90..95aa92e25b30 100644 --- a/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu +++ b/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu @@ -96,7 +96,7 @@ void rms_norm_dynamic_per_token_quant_dispatch( std::optional const& scale_ub, std::optional& residual) { int32_t hidden_size = input.size(-1); - int32_t num_tokens = input.numel() / hidden_size; + auto num_tokens = input.numel() / hidden_size; dim3 grid(num_tokens); dim3 block(std::min(hidden_size, 1024));