mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-28 09:47:05 +08:00
[CI/Build][AMD] Fix ref_dynamic_per_token_quant reference implementation on ROCm. (#30291)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
parent
8f8fda261a
commit
302b2c1eb9
@ -30,16 +30,11 @@ def ref_dynamic_per_token_quant(
|
|||||||
if quant_dtype == torch.int8
|
if quant_dtype == torch.int8
|
||||||
else torch.finfo(quant_dtype)
|
else torch.finfo(quant_dtype)
|
||||||
)
|
)
|
||||||
qtype_traits_max = (
|
use_fp8fnuz = (
|
||||||
ROCM_FP8FNUZ_MAX
|
current_platform.is_fp8_fnuz() and quant_dtype == current_platform.fp8_dtype()
|
||||||
if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
|
|
||||||
else qtype_traits.max
|
|
||||||
)
|
|
||||||
qtype_traits_min = (
|
|
||||||
-ROCM_FP8FNUZ_MAX
|
|
||||||
if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
|
|
||||||
else qtype_traits.min
|
|
||||||
)
|
)
|
||||||
|
qtype_traits_max = ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.max
|
||||||
|
qtype_traits_min = -ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.min
|
||||||
qtype_max = as_float32_tensor(qtype_traits_max)
|
qtype_max = as_float32_tensor(qtype_traits_max)
|
||||||
s_1 = as_float32_tensor(1.0)
|
s_1 = as_float32_tensor(1.0)
|
||||||
s_512 = as_float32_tensor(512.0)
|
s_512 = as_float32_tensor(512.0)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user