diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
index d39acae0b043f..5514d7770cff8 100755
--- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
@@ -135,7 +135,7 @@ run_and_track_test 1 "test_compilation.py" \
 run_and_track_test 2 "test_basic.py" \
     "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py"
 run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \
-    "python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
+    "HF_HUB_DISABLE_XET=1 python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
 run_and_track_test 4 "test_quantization_accuracy.py" \
     "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py"
 run_and_track_test 5 "examples/offline_inference/tpu.py" \
diff --git a/tests/entrypoints/llm/test_accuracy.py b/tests/entrypoints/llm/test_accuracy.py
index 6c5706d163406..39bc8ab07d45f 100644
--- a/tests/entrypoints/llm/test_accuracy.py
+++ b/tests/entrypoints/llm/test_accuracy.py
@@ -73,9 +73,6 @@ def test_lm_eval_accuracy_v1_engine(model, monkeypatch: pytest.MonkeyPatch):
         if current_platform.is_tpu():
             # Limit compilation time for TPU V1
 
-            # xet doesn't work well for both Qwen/Qwen3-1.7B and
-            # google/gemma-3-1b-it
-            m.setenv("HF_HUB_DISABLE_XET", "1")
             more_args = "max_model_len=2048,max_num_seqs=64"
 
             # Add TP test (if provided)