[CI/Build][AMD] Fix ref_dynamic_per_token_quant reference implementation on ROCm. (#30291)

Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
2026-07-30 03:41:20 +08:00 · 2025-12-12 03:30:23 -06:00 · 2025-12-12 03:30:23 -06:00 · 302b2c1eb9
commit 302b2c1eb9
parent 8f8fda261a
1 changed files with 4 additions and 9 deletions
--- a/tests/kernels/quant_utils.py
+++ b/tests/kernels/quant_utils.py
@ -30,16 +30,11 @@ def ref_dynamic_per_token_quant(
        if quant_dtype == torch.int8
        else torch.finfo(quant_dtype)
    )
-    qtype_traits_max = (
+    use_fp8fnuz = (
-        ROCM_FP8FNUZ_MAX
+        current_platform.is_fp8_fnuz() and quant_dtype == current_platform.fp8_dtype()
        if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
        else qtype_traits.max
    )
    qtype_traits_min = (
        -ROCM_FP8FNUZ_MAX
        if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
        else qtype_traits.min
    )
    qtype_traits_max = ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.max
    qtype_traits_min = -ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.min
    qtype_max = as_float32_tensor(qtype_traits_max)
    s_1 = as_float32_tensor(1.0)
    s_512 = as_float32_tensor(512.0)