From 90d6cf921fe623524f618740616a6cf494d4a8df Mon Sep 17 00:00:00 2001 From: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com> Date: Thu, 11 Dec 2025 13:00:15 -0800 Subject: [PATCH] [BugFix][MM]support VLLM_RANDOMIZE_DP_DUMMY_INPUTS (#30472) Signed-off-by: Xingyu Liu Co-authored-by: Cyrus Leung --- vllm/v1/worker/gpu_model_runner.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 0e2bf9df9a18f..40c8059f90d34 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import functools import gc import itertools import time @@ -3892,19 +3893,21 @@ class GPUModelRunner( return {} @contextmanager - def maybe_randomize_inputs(self, input_ids: torch.Tensor): + def maybe_randomize_inputs( + self, input_ids: torch.Tensor | None, inputs_embeds: torch.Tensor | None + ): """ Randomize input_ids if VLLM_RANDOMIZE_DP_DUMMY_INPUTS is set. This is to help balance expert-selection - during profile_run - during DP rank dummy run """ + dp_size = self.vllm_config.parallel_config.data_parallel_size randomize_inputs = envs.VLLM_RANDOMIZE_DP_DUMMY_INPUTS and dp_size > 1 if not randomize_inputs: yield - else: - import functools + elif input_ids is not None: @functools.cache def rand_input_ids() -> torch.Tensor: @@ -3912,13 +3915,27 @@ class GPUModelRunner( self.input_ids.gpu, low=0, high=self.model_config.get_vocab_size(), - dtype=input_ids.dtype, ) - logger.debug_once("Randomizing dummy data for DP Rank") + logger.debug_once("Randomizing dummy input_ids for DP Rank") input_ids.copy_(rand_input_ids()[: input_ids.size(0)], non_blocking=True) yield input_ids.fill_(0) + else: + + @functools.cache + def rand_inputs_embeds() -> torch.Tensor: + return torch.randn_like( + self.inputs_embeds.gpu, + ) + + assert inputs_embeds is not None + logger.debug_once("Randomizing dummy inputs_embeds for DP Rank") + inputs_embeds.copy_( + rand_inputs_embeds()[: inputs_embeds.size(0)], non_blocking=True + ) + yield + inputs_embeds.fill_(0) def _get_mm_dummy_batch( self, @@ -4167,7 +4184,7 @@ class GPUModelRunner( num_tokens_across_dp[:] = num_tokens_padded with ( - self.maybe_randomize_inputs(input_ids), + self.maybe_randomize_inputs(input_ids, inputs_embeds), set_forward_context( attn_metadata, self.vllm_config,