Fix benchmark_moe.py tuning for CUDA devices (#14164)

2026-07-25 10:17:14 +08:00 · 2025-03-04 00:11:03 -05:00 · 2025-03-04 00:11:03 -05:00 · f78c0be80a
commit f78c0be80a
parent 66233af7b6
1 changed files with 3 additions and 1 deletions
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@ -2,6 +2,7 @@

 import argparse
 import time
+from contextlib import nullcontext
 from datetime import datetime
 from itertools import product
 from typing import Any, TypedDict
@ -412,7 +413,8 @@ class BenchmarkWorker:
                                                   hidden_size, search_space,
                                                   is_fp16, topk)

-        with torch.cuda.device(self.device_id):
+        with torch.cuda.device(self.device_id) if current_platform.is_rocm(
+        ) else nullcontext():
            for config in tqdm(search_space):
                try:
                    kernel_time = benchmark_config(