diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index f623809b69aa8..52fba207696b6 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1411,12 +1411,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
         ubatch_slices[0] = (padded_first_ubatch_slice, padded_first_ubatch_slice)
         ubatch_slices[1] = (padded_second_ubatch_slice, padded_second_ubatch_slice)
 
-        # if (num_pad_tokens_first_ubatch > 0):
-        #     print(f"FIRST UBATCH PADDING {num_pad_tokens_first_ubatch} TOTAL: {max_tokens_across_dp_cpu} ORIGINAL{first_ubatch_num_tokens}")
-        # if (num_pad_tokens_second_ubatch > 0):
-        #     print(f"SECOND UBATCH PADDING {num_pad_tokens_second_ubatch} TOTAL: {max_tokens_across_dp_cpu} ORIGINAL{second_ubatch_num_tokens}")
-        # print(f"num padded tokens: {num_pad_tokens} num tokens tensor: {num_tokens_after_padding} first num_tokens: {first_ubatch_num_tokens} second num tokens {second_ubatch_num_tokens}")
-
     # This is where the second ubatch is adjusted to account for the padding.
     # Should be called after attention metadata creation. This just extends
     # the second ubatch slice out to the total number of tokens 
@@ -1426,15 +1420,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
         padded_second_ubatch_slice = slice(ubatch_slices[1][1].start, num_total_tokens)
         ubatch_slices[1] = (padded_second_ubatch_slice, padded_second_ubatch_slice)
 
-
-    # Returns num_padded_tokens. This is just a number that should be added to the
-    # current number of tokens. It is a sum of the number of padded tokens from DP
-    # padding along with the number of padded tokens from cudagraph padding.
-    # The second tensor object is None when DP is disabled. When DP is enabled.
-    # it contains the number of tokens on each dp rank
-    def compute_padding(self,) -> tuple[int, Optional[torch.Tensor]]:
-        return (0, torch.Tensor())
-    
     def should_ubatch(self, should_ubatch: bool) -> bool:
         dp_size = self.vllm_config.parallel_config.data_parallel_size
         dp_rank = self.vllm_config.parallel_config.data_parallel_rank
@@ -1481,28 +1466,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
             tokens_slice = slice(tokens_slice.start, tokens_slice.start + 1)
             num_tokens = 1
 
-        # if (self.use_cuda_graph
-        #         and num_tokens <= self.cudagraph_batch_sizes[-1]):
-        #     # Use piecewise CUDA graphs.
-        #     # Add padding to the batch size.
-        #     tokens_slice = \
-        #      slice(tokens_slice.start, tokens_slice.start+
-        #            self.vllm_config.pad_for_cudagraph(num_tokens))
-        # else:
-        #     # Eager mode.
-        #     # Pad tokens to multiple of tensor_parallel_size when
-        #     # enabled collective fusion for SP
-        #     tp_size = self.vllm_config.parallel_config.tensor_parallel_size
-        #     if self.vllm_config.compilation_config.pass_config. \
-        #         enable_sequence_parallelism and tp_size > 1:
-        #         from vllm.utils import round_up
-        #         tokens_slice = slice(
-        #             tokens_slice.start,
-        #             tokens_slice.start + round_up(num_tokens, tp_size))
-
-        # update num tokens for padding
-        # num_tokens = tokens_slice.stop - tokens_slice.start
-
         # _prepare_inputs may reorder the batch, so we must gather multi
         # modal outputs after that to ensure the correct order
         if self.is_multimodal_model:
@@ -1590,8 +1553,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
 
         def model_inputs(tokens_slice: slice, use_dummy_input: bool) -> tuple:
             if use_dummy_input:
-                # print("MAKING DUMMY BATCH")
-                # assert num_dummy_tokens == 1
                 return self._get_dummy_model_inputs(num_dummy_tokens)
             else:
                 assert scheduler_output is not None
@@ -1610,7 +1571,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                 num_tokens_across_dp=num_tokens_across_dp,
                 skip_cuda_graphs=skip_cuda_graphs
             )
-            # First get some inputs
+
             ubatch_metadata: list[UbatchMetadata] = []
             for i, (_, tokens_slice) in enumerate(ubatch_slices):
                 input_ids, positions, inputs_embeds, intermediate_tensors = \
@@ -1683,9 +1644,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
         # run micro-batched
         if ubatch_slices is not None:
             assert len(ubatch_slices) == 2, "Only two ubatches has been tested"
-            # num_tokens = ubatch_slices[1][1].stop
             print(f"RUNNING UBATCH {ubatch_slices} is_dummy_run: {is_dummy_run} num_tokens_across_dp{num_tokens_across_dp}")
-            # assert not is_dummy_run
+
             compute_stream = torch.cuda.current_stream()
             ubatch_metadata = _make_ubatch_metadata(
                     ubatch_slices=ubatch_slices,
@@ -1696,7 +1656,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                     skip_cuda_graphs=skip_cuda_graphs
                 )
             return _run_ubatches(ubatch_metadata)
-        # run single batch
+        # run normal batch
         else:
             input_ids, positions, inputs_embeds, intermediate_tensors = \
                 model_inputs(slice(0, num_scheduled_tokens), is_dummy_run)
@@ -2038,7 +1998,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
             if spec_decode_metadata is None:
                 # input_ids can be None for multimodal models.
                 target_token_ids = self.input_ids[:num_scheduled_tokens]
-                #TODO(sage) make sure this works with mrope
                 # TODO(woosuk): Support M-RoPE.
                 target_positions = self.positions[:num_scheduled_tokens]
                 if self.use_aux_hidden_state_outputs:
@@ -2068,7 +2027,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                     num_tokens,
                 )
                 target_token_ids = self.input_ids[token_indices]
-                #TODO(sage) make sure this works with mrope
                 # TODO(woosuk): Support M-RoPE.
                 target_positions = self.positions[token_indices]
                 if self.use_aux_hidden_state_outputs: