From b4d755ac050094cb903521ee035b034790136cb4 Mon Sep 17 00:00:00 2001
From: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
Date: Sat, 13 Dec 2025 18:09:36 +0800
Subject: [PATCH] fix inductor tf32 setting error

Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
---
 tests/v1/e2e/test_async_scheduling.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/v1/e2e/test_async_scheduling.py b/tests/v1/e2e/test_async_scheduling.py
index 307b6e66682f6..5d64a41fbc9a5 100644
--- a/tests/v1/e2e/test_async_scheduling.py
+++ b/tests/v1/e2e/test_async_scheduling.py
@@ -4,6 +4,7 @@ from itertools import repeat
 from typing import Any
 
 import pytest
+import torch
 import torch._dynamo.config as dynamo_config
 
 from vllm import SamplingParams
@@ -158,6 +159,7 @@ def run_tests(
             m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
         # lock matmul precision to full FP32 (IEEE)
         m.setenv("VLLM_FLOAT32_MATMUL_PRECISION", "ieee")
+        torch.backends.cuda.matmul.allow_tf32 = False
         # m.setenv("VLLM_BATCH_INVARIANT", "1")
         outputs: list[tuple[str, list, list]] = []
         for n, (