From 52a7d91980012e4e476e2ade43bbcce8ed7a91bd Mon Sep 17 00:00:00 2001
From: Robert Shaw <rshaw@neuralmagic.com>
Date: Fri, 3 Oct 2025 13:25:00 -0400
Subject: [PATCH] debug

Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
---
 vllm/v1/worker/gpu_model_runner.py | 2 ++
 vllm/v1/worker/ubatch_splitting.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index ff95acf0c016..b6b16968523c 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2400,6 +2400,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
 
         # Run the model.
         # Use persistent buffers for CUDA graphs.
+        logger.info(f"====== EXECUTE {ubatch_slices=}, {num_input_tokens=}, {num_tokens_across_dp=}")
         with (set_forward_context(
                 attn_metadata,
                 self.vllm_config,
@@ -3046,6 +3047,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                 (1 token) and prefill (multiple tokens) requests.
             remove_lora: If False, dummy LoRAs are not destroyed after the run
         """
+        logger.info("====== DUMMY RUN")
         assert cudagraph_runtime_mode is None or \
             cudagraph_runtime_mode.valid_runtime_modes()
 
diff --git a/vllm/v1/worker/ubatch_splitting.py b/vllm/v1/worker/ubatch_splitting.py
index 7767750aa604..49fe4e6c43d8 100644
--- a/vllm/v1/worker/ubatch_splitting.py
+++ b/vllm/v1/worker/ubatch_splitting.py
@@ -167,6 +167,7 @@ def ubatch_split(
         num_tokens_unpadded,
         uniform_decode=uniform_decode,
     )
+    logger.info(f"==== {should_attempt_ubatching=}, {num_tokens_unpadded=}")
 
     # Don't microbatch unless every other DP worker is also microbatching
     should_ubatch, num_tokens_after_padding = get_dp_padding_ubatch(
@@ -175,6 +176,7 @@ def ubatch_split(
         should_attempt_ubatching,
         vllm_config,
     )
+    logger.info(f"==== {should_ubatch=}, {num_tokens_after_padding=}")
 
     if not should_ubatch:
         return (None, None)