[ROCm][CI][Bugfix] Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers accuracy issues (#29909)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-06-06 11:02:16 +08:00 · 2025-12-02 20:36:49 -06:00 · 2025-12-02 20:36:49 -06:00 · 506ed87e87
commit 506ed87e87
parent 4dd7978374
3 changed files with 22 additions and 7 deletions
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@ -65,7 +65,6 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests
 COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples
 COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
 COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
 # Centralized v1 package - copied to both test and final stages
 COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
 # -----------------------
@ -98,7 +97,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system hf_transfer
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
-# Copy in the v1 package
+# Copy in the v1 package (for python-only install test group)
 COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
 # Source code is used in the `python_only_compile.sh` test
@ -130,9 +129,6 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
    && pip uninstall -y vllm \
    && uv pip install --system *.whl
 # Copy in the v1 package
 COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
 ARG COMMON_WORKDIR
 # Copy over the benchmark scripts as well
--- a/requirements/rocm-test.txt
+++ b/requirements/rocm-test.txt
@ -70,8 +70,8 @@ torchgeo==0.7.0
 mteb==2.1.2
 # Data processing
-xgrammar @ git+https://github.com/mlc-ai/xgrammar.git@eafd4db51b78acc64b3f0764ef27dfd206c28628
+xgrammar==0.1.27
-    # Test async scheduling
+# Test async scheduling
 # Utilities
 num2words==0.5.14
--- a/tests/models/multimodal/generation/conftest.py
+++ b/tests/models/multimodal/generation/conftest.py
@ -0,0 +1,19 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Pytest configuration for vLLM tests."""
 import torch
 from vllm.platforms import current_platform
 def pytest_configure(config):
    """Disable Flash/MemEfficient SDP on ROCm to avoid HF
    Transformers accuracy issues.
    """
    if not current_platform.is_rocm():
        return
    torch.backends.cuda.enable_flash_sdp(False)
    torch.backends.cuda.enable_mem_efficient_sdp(False)
    torch.backends.cuda.enable_math_sdp(True)