[Bugfix] Flush TunableOp results before worker processes are destroyed. (#13623)

Signed-off-by: Nichols A. Romero <nick.romero@amd.com>
2025-12-10 06:15:01 +08:00 · 2025-02-25 05:08:20 -06:00 · 2025-02-25 05:08:20 -06:00 · fa82074167
commit fa82074167
parent 75e9d49796
1 changed files with 9 additions and 0 deletions
--- a/vllm/executor/multiproc_worker_utils.py
+++ b/vllm/executor/multiproc_worker_utils.py
@ -250,6 +250,15 @@ def _run_worker_process(
    except Exception:
        logger.exception("Worker failed")

+    # Flush TunableOp results when TunableOp is enabled and
+    # online (in situ) tuning is enabled.
+    # Offline tuning API (record_untuned_is_enabled()) only
+    # available in PyTorch 2.6 or later.
+    import torch.cuda.tunable as tunable
+    if (tunable.is_enabled() and tunable.tuning_is_enabled()
+            and not tunable.record_untuned_is_enabled()):
+        tunable.write_file()
+
    logger.info("Worker exiting")