mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 16:35:17 +08:00
[CI/Build][AMD] Fix ref_dynamic_per_token_quant reference implementation on ROCm. (#30291)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
parent
8f8fda261a
commit
302b2c1eb9
@ -30,16 +30,11 @@ def ref_dynamic_per_token_quant(
|
||||
if quant_dtype == torch.int8
|
||||
else torch.finfo(quant_dtype)
|
||||
)
|
||||
qtype_traits_max = (
|
||||
ROCM_FP8FNUZ_MAX
|
||||
if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
|
||||
else qtype_traits.max
|
||||
)
|
||||
qtype_traits_min = (
|
||||
-ROCM_FP8FNUZ_MAX
|
||||
if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
|
||||
else qtype_traits.min
|
||||
use_fp8fnuz = (
|
||||
current_platform.is_fp8_fnuz() and quant_dtype == current_platform.fp8_dtype()
|
||||
)
|
||||
qtype_traits_max = ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.max
|
||||
qtype_traits_min = -ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.min
|
||||
qtype_max = as_float32_tensor(qtype_traits_max)
|
||||
s_1 = as_float32_tensor(1.0)
|
||||
s_512 = as_float32_tensor(512.0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user