misc cleanups to prepare for rebase

Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
Sage Moore 2025-06-02 18:00:56 +00:00
parent 8f592524cb
commit 90e46ee5e3
2 changed files with 1 additions and 11 deletions

View File

@ -49,7 +49,6 @@ from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader, maybe_remap_kv_scale_name)
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors
from vllm.v1.worker.ubatching import get_current_ubatch_context
from .interfaces import SupportsPP
from .utils import (PPMissingLayer, is_pp_missing_parameter,
@ -564,8 +563,6 @@ class DeepseekV2DecoderLayer(nn.Module):
hidden_states: torch.Tensor,
residual: Optional[torch.Tensor],
) -> torch.Tensor:
# if (ubatch_ctx := get_current_ubatch_context()) is not None:
# print("in decoder, ubatch:", ubatch_ctx.id)
# Self Attention
if residual is None:
residual = hidden_states
@ -659,9 +656,6 @@ class DeepseekV2Model(nn.Module):
intermediate_tensors: Optional[IntermediateTensors],
inputs_embeds: Optional[torch.Tensor] = None,
) -> Union[torch.Tensor, IntermediateTensors]:
# if (ubatch_ctx := get_current_ubatch_context()) is not None:
# print("in forward, ubatch:", ubatch_ctx.id)
if get_pp_group().is_first_rank:
if inputs_embeds is not None:
hidden_states = inputs_embeds

View File

@ -1417,9 +1417,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
if model_input.attn_metadata is not None:
model_input.attn_metadata.enable_kv_scales_calculation = False
import nvtx
with nvtx.annotate("execute_model"):
self.execute_model(model_input, kv_caches, intermediate_tensors)
self.execute_model(model_input, kv_caches, intermediate_tensors)
torch.cuda.synchronize()
if self.lora_config:
self._remove_dummy_loras()
@ -2125,8 +2123,6 @@ class CUDAGraphRunner(nn.Module):
) -> torch.Tensor:
attn_metadata: AttentionMetadata = get_forward_context().attn_metadata
print("=== CUDAGraphRunner forward ===")
# Copy the input tensors to the input buffers.
self.input_buffers["input_ids"].copy_(input_ids, non_blocking=True)
if positions is not None: