mirror of
https://git.datalinker.icu/deepseek-ai/DeepSeek-V3.git
synced 2025-12-09 04:44:28 +08:00
add clamp min of 1e-4
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
21b2dfe172
commit
484b42ca4e
@ -23,7 +23,9 @@ def act_quant_kernel(x_ptr, y_ptr, s_ptr, BLOCK_SIZE: tl.constexpr, scale_fmt: t
|
||||
pid = tl.program_id(axis=0)
|
||||
offs = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
|
||||
x = tl.load(x_ptr + offs).to(tl.float32)
|
||||
s = tl.max(tl.abs(x)) / 448.
|
||||
amax = tl.max(tl.abs(x))
|
||||
amax = tl.min(amax, 1e-4)
|
||||
s = amax / 448.
|
||||
if scale_fmt == "ue8m0":
|
||||
exp = tl.math.ceil(tl.math.log2(s))
|
||||
s = tl.math.exp2(exp)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user