[Bugfix] Flush TunableOp results before worker processes are destroyed. (#13623)

Signed-off-by: Nichols A. Romero <nick.romero@amd.com>
2025-12-10 12:35:51 +08:00 · 2025-02-25 05:08:20 -06:00 · 2025-02-25 05:08:20 -06:00 · fa82074167
commit fa82074167
parent 75e9d49796
1 changed files with 9 additions and 0 deletions
--- a/vllm/executor/multiproc_worker_utils.py
+++ b/vllm/executor/multiproc_worker_utils.py
@ -250,6 +250,15 @@ def _run_worker_process(
    except Exception:
        logger.exception("Worker failed")
    # Flush TunableOp results when TunableOp is enabled and
    # online (in situ) tuning is enabled.
    # Offline tuning API (record_untuned_is_enabled()) only
    # available in PyTorch 2.6 or later.
    import torch.cuda.tunable as tunable
    if (tunable.is_enabled() and tunable.tuning_is_enabled()
            and not tunable.record_untuned_is_enabled()):
        tunable.write_file()
    logger.info("Worker exiting")