From cd16bcff1ea7373d706c69abe8976609d9854aa8 Mon Sep 17 00:00:00 2001 From: i-yuanyukun Date: Thu, 18 Dec 2025 15:56:20 +0800 Subject: [PATCH] [Chore] resolve some bugs due to merge --- vllm/v1/worker/gpu_ffn_model_runner.py | 4 ++-- vllm/v1/worker/gpu_model_runner.py | 5 +++-- vllm/v1/worker/gpu_ubatch_wrapper.py | 4 ---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/vllm/v1/worker/gpu_ffn_model_runner.py b/vllm/v1/worker/gpu_ffn_model_runner.py index cb08c9c05ae58..cd9940ef5e7a7 100644 --- a/vllm/v1/worker/gpu_ffn_model_runner.py +++ b/vllm/v1/worker/gpu_ffn_model_runner.py @@ -220,7 +220,7 @@ class GPUFFNModelRunner(LoRAModelRunnerMixin): hidden_states, dim=0 ) ffn_output = self.model.compute_ffn_output( - current_layer_idx, gathered_hidden_states + gathered_hidden_states, current_layer_idx ) # Extract the output corresponding to current rank start_idx = hidden_states.shape[0] * get_tensor_model_parallel_rank() @@ -229,7 +229,7 @@ class GPUFFNModelRunner(LoRAModelRunnerMixin): else: # Single TP case rank_ffn_output = self.model.compute_ffn_output( - current_layer_idx, hidden_states + hidden_states, current_layer_idx ) return rank_ffn_output diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index fff775a9f8241..ed6b9cc98f3a2 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3211,8 +3211,9 @@ class GPUModelRunner( record_function_or_nullcontext("gpu_model_runner: forward"), self.maybe_get_kv_connector_output(scheduler_output) as kv_connector_output, ): - logger.info(f"input_ids: {input_ids.shape}") - if inputs_embeds: + if input_ids is not None: + logger.info(f"input_ids: {input_ids.shape}") + if inputs_embeds is not None: logger.info(f"inputs_embeds: {inputs_embeds.shape}") model_output = self._model_forward( input_ids=input_ids, diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index 8a1c9d90abbbf..9e17c718c5513 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -127,10 +127,6 @@ class UBatchWrapper: comm_sms: int = envs.VLLM_DBO_COMM_SMS set_comm_sms = lambda sms: None - if ( - vllm_config.parallel_config.enable_expert_parallel - and not vllm_config.afd_config - ): if ( vllm_config.parallel_config.enable_expert_parallel and not vllm_config.afd_config