fix inductor tf32 setting error

Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
2026-07-07 01:47:12 +08:00 · 2025-12-13 18:09:36 +08:00 · 2025-12-13 18:09:36 +08:00 · b4d755ac05
commit b4d755ac05
parent c5df2565ab
1 changed files with 2 additions and 0 deletions
--- a/tests/v1/e2e/test_async_scheduling.py
+++ b/tests/v1/e2e/test_async_scheduling.py
@ -4,6 +4,7 @@ from itertools import repeat
 from typing import Any

 import pytest
+import torch
 import torch._dynamo.config as dynamo_config

 from vllm import SamplingParams
@ -158,6 +159,7 @@ def run_tests(
            m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
        # lock matmul precision to full FP32 (IEEE)
        m.setenv("VLLM_FLOAT32_MATMUL_PRECISION", "ieee")
+        torch.backends.cuda.matmul.allow_tf32 = False
        # m.setenv("VLLM_BATCH_INVARIANT", "1")
        outputs: list[tuple[str, list, list]] = []
        for n, (