From 26ddfa299cc2835ab5ac9d0e6f3ddba3b85e5db0 Mon Sep 17 00:00:00 2001 From: i-yuanyukun Date: Thu, 18 Dec 2025 17:02:39 +0800 Subject: [PATCH] [Chore] remove duplicate code --- vllm/v1/worker/gpu_model_runner.py | 56 ------------------------------ 1 file changed, 56 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index a09f292d98e14..2409c9071f94b 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -642,25 +642,6 @@ class GPUModelRunner( with_stack=False, ) - profile_dir = ( - "./profiler_logs/attn" - if self.afd_config is not None and self.afd_config.afd_role == "attention" - else "./profiler_logs/normal" - ) - self.profiler = torch.profiler.profile( - activities=[ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA, - ], - schedule=torch.profiler.schedule( - wait=6000 + 4000, warmup=1, active=30, repeat=1 - ), - on_trace_ready=torch.profiler.tensorboard_trace_handler(profile_dir), - record_shapes=True, - profile_memory=False, - with_stack=False, - ) - def reset_mm_cache(self) -> None: if self.mm_budget: self.mm_budget.reset_cache() @@ -2988,38 +2969,6 @@ class GPUModelRunner( ) return afd_metadata - def _build_afd_metadata( - self, ubatch_slices: UBatchSlices | None, num_tokens_unpadded: int - ): - afd_metadata = None - if self.afd_config: - # For prefill, compute tokens per stage based on actual token - # counts - afd_tokens_start_loc = [0] - afd_tokens_lens = [] - if ubatch_slices and len(ubatch_slices) > 1: - afd_tokens_start_loc = [ub.token_slice.start for ub in ubatch_slices] - afd_reqs_start_loc = [ub.request_slice.start for ub in ubatch_slices] - logger.info( - f"afd_tokens_start_loc: {afd_tokens_start_loc} " - f"afd_reqs_start_loc: {afd_reqs_start_loc} " - f"ubatch_slices: {ubatch_slices}" - ) - afd_tokens_lens = [ub.num_tokens for ub in ubatch_slices] - else: - afd_tokens_start_loc = [0] - afd_reqs_start_loc = [0] - afd_tokens_lens = [num_tokens_unpadded] - afd_metadata = AFDMetadata( - afd_tokens_start_loc=afd_tokens_start_loc, - afd_reqs_start_loc=afd_reqs_start_loc, - afd_stage_idx=0, - afd_connector=self.afd_connector, - afd_tokens_lens=afd_tokens_lens, - num_of_stages=len(ubatch_slices) if ubatch_slices else 1, - ) - return afd_metadata - @torch.inference_mode() def execute_model( self, @@ -5573,11 +5522,6 @@ class GPUModelRunner( if hasattr(self, "afd_connector") and self.afd_connector: self.afd_connector.init_afd_connector() - def initialize_afd_connector(self) -> None: - """Initialize AFD connector if available.""" - if hasattr(self, "afd_connector") and self.afd_connector: - self.afd_connector.init_afd_connector() - def may_add_encoder_only_layers_to_kv_cache_config(self) -> None: """ Add encoder-only layers to the KV cache config.