mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 09:28:43 +08:00
[Bug] Fix B200 DeepGEMM E8M0 Accuracy Issue (#22399)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
46a13949d5
commit
eec890c1c1
@ -799,7 +799,8 @@ def requant_weight_ue8m0_inplace(
|
||||
s_exp = s_exp[:m_cur, :k_cur]
|
||||
w_dq = w_q.to(torch.float32) * s_exp
|
||||
# Re-quantise using power-of-two scaling (UE8M0).
|
||||
w_requant, s_requant = per_block_cast_to_fp8(w_dq, [block_m, block_k])
|
||||
w_requant, s_requant = per_block_cast_to_fp8(w_dq, [block_m, block_k],
|
||||
use_ue8m0=True)
|
||||
|
||||
# Write back the results in-place.
|
||||
w_q.copy_(w_requant)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user