diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 942fd1973f4f..d44d6930c177 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -340,7 +340,8 @@ class RocmPlatform(Platform): else: parallel_config.worker_cls = "vllm.worker.worker.Worker" # Aiter rms norm perform best when CUDA Graph capture is enabled. - if use_v1 and use_aiter_rms_norm and not is_eager_execution: + if (use_v1 and use_aiter_rms_norm and not is_eager_execution + and "-rms_norm" not in compilation_config.custom_ops): compilation_config.custom_ops.append("+rms_norm") @classmethod