Merge pull request #969 from youkaichao/rmsnorm

act_quant_kernel
2025-12-08 20:34:32 +08:00 · 2025-08-27 17:14:24 +08:00 · 2025-08-27 17:14:24 +08:00 · 9b4e9788e4
commit 9b4e9788e4
parent 82f6008c8c adecc0efbe
1 changed files with 2 additions and 1 deletions
--- a/inference/kernel.py
+++ b/inference/kernel.py
@ -23,7 +23,8 @@ def act_quant_kernel(x_ptr, y_ptr, s_ptr, BLOCK_SIZE: tl.constexpr, scale_fmt: t
    pid = tl.program_id(axis=0)
    offs = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    x = tl.load(x_ptr + offs).to(tl.float32)
-    amax = tl.max(tl.abs(x), 1e-4)
+    amax = tl.max(tl.abs(x)) # reduction
    amax = tl.maximum(amax, 1e-4) # clamp to 1e-4
    s = amax / 448.
    if scale_fmt == "ue8m0":
        exp = tl.math.ceil(tl.math.log2(s))