mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-07 02:07:04 +08:00
misc cleanups to prepare for rebase
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
8f592524cb
commit
90e46ee5e3
@ -49,7 +49,6 @@ from vllm.model_executor.model_loader.weight_utils import (
|
||||
default_weight_loader, maybe_remap_kv_scale_name)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.v1.worker.ubatching import get_current_ubatch_context
|
||||
|
||||
from .interfaces import SupportsPP
|
||||
from .utils import (PPMissingLayer, is_pp_missing_parameter,
|
||||
@ -564,8 +563,6 @@ class DeepseekV2DecoderLayer(nn.Module):
|
||||
hidden_states: torch.Tensor,
|
||||
residual: Optional[torch.Tensor],
|
||||
) -> torch.Tensor:
|
||||
# if (ubatch_ctx := get_current_ubatch_context()) is not None:
|
||||
# print("in decoder, ubatch:", ubatch_ctx.id)
|
||||
# Self Attention
|
||||
if residual is None:
|
||||
residual = hidden_states
|
||||
@ -659,9 +656,6 @@ class DeepseekV2Model(nn.Module):
|
||||
intermediate_tensors: Optional[IntermediateTensors],
|
||||
inputs_embeds: Optional[torch.Tensor] = None,
|
||||
) -> Union[torch.Tensor, IntermediateTensors]:
|
||||
# if (ubatch_ctx := get_current_ubatch_context()) is not None:
|
||||
# print("in forward, ubatch:", ubatch_ctx.id)
|
||||
|
||||
if get_pp_group().is_first_rank:
|
||||
if inputs_embeds is not None:
|
||||
hidden_states = inputs_embeds
|
||||
|
||||
@ -1417,9 +1417,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
|
||||
if model_input.attn_metadata is not None:
|
||||
model_input.attn_metadata.enable_kv_scales_calculation = False
|
||||
|
||||
import nvtx
|
||||
with nvtx.annotate("execute_model"):
|
||||
self.execute_model(model_input, kv_caches, intermediate_tensors)
|
||||
self.execute_model(model_input, kv_caches, intermediate_tensors)
|
||||
torch.cuda.synchronize()
|
||||
if self.lora_config:
|
||||
self._remove_dummy_loras()
|
||||
@ -2125,8 +2123,6 @@ class CUDAGraphRunner(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
attn_metadata: AttentionMetadata = get_forward_context().attn_metadata
|
||||
|
||||
print("=== CUDAGraphRunner forward ===")
|
||||
|
||||
# Copy the input tensors to the input buffers.
|
||||
self.input_buffers["input_ids"].copy_(input_ids, non_blocking=True)
|
||||
if positions is not None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user