From 26ddfa299cc2835ab5ac9d0e6f3ddba3b85e5db0 Mon Sep 17 00:00:00 2001
From: i-yuanyukun <i-yuanyukun@stepfun.com>
Date: Thu, 18 Dec 2025 17:02:39 +0800
Subject: [PATCH] [Chore] remove duplicate code

---
 vllm/v1/worker/gpu_model_runner.py | 56 ------------------------------
 1 file changed, 56 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index a09f292d98e14..2409c9071f94b 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -642,25 +642,6 @@ class GPUModelRunner(
             with_stack=False,
         )
 
-        profile_dir = (
-            "./profiler_logs/attn"
-            if self.afd_config is not None and self.afd_config.afd_role == "attention"
-            else "./profiler_logs/normal"
-        )
-        self.profiler = torch.profiler.profile(
-            activities=[
-                torch.profiler.ProfilerActivity.CPU,
-                torch.profiler.ProfilerActivity.CUDA,
-            ],
-            schedule=torch.profiler.schedule(
-                wait=6000 + 4000, warmup=1, active=30, repeat=1
-            ),
-            on_trace_ready=torch.profiler.tensorboard_trace_handler(profile_dir),
-            record_shapes=True,
-            profile_memory=False,
-            with_stack=False,
-        )
-
     def reset_mm_cache(self) -> None:
         if self.mm_budget:
             self.mm_budget.reset_cache()
@@ -2988,38 +2969,6 @@ class GPUModelRunner(
             )
         return afd_metadata
 
-    def _build_afd_metadata(
-        self, ubatch_slices: UBatchSlices | None, num_tokens_unpadded: int
-    ):
-        afd_metadata = None
-        if self.afd_config:
-            # For prefill, compute tokens per stage based on actual token
-            # counts
-            afd_tokens_start_loc = [0]
-            afd_tokens_lens = []
-            if ubatch_slices and len(ubatch_slices) > 1:
-                afd_tokens_start_loc = [ub.token_slice.start for ub in ubatch_slices]
-                afd_reqs_start_loc = [ub.request_slice.start for ub in ubatch_slices]
-                logger.info(
-                    f"afd_tokens_start_loc: {afd_tokens_start_loc} "
-                    f"afd_reqs_start_loc: {afd_reqs_start_loc} "
-                    f"ubatch_slices: {ubatch_slices}"
-                )
-                afd_tokens_lens = [ub.num_tokens for ub in ubatch_slices]
-            else:
-                afd_tokens_start_loc = [0]
-                afd_reqs_start_loc = [0]
-                afd_tokens_lens = [num_tokens_unpadded]
-            afd_metadata = AFDMetadata(
-                afd_tokens_start_loc=afd_tokens_start_loc,
-                afd_reqs_start_loc=afd_reqs_start_loc,
-                afd_stage_idx=0,
-                afd_connector=self.afd_connector,
-                afd_tokens_lens=afd_tokens_lens,
-                num_of_stages=len(ubatch_slices) if ubatch_slices else 1,
-            )
-        return afd_metadata
-
     @torch.inference_mode()
     def execute_model(
         self,
@@ -5573,11 +5522,6 @@ class GPUModelRunner(
         if hasattr(self, "afd_connector") and self.afd_connector:
             self.afd_connector.init_afd_connector()
 
-    def initialize_afd_connector(self) -> None:
-        """Initialize AFD connector if available."""
-        if hasattr(self, "afd_connector") and self.afd_connector:
-            self.afd_connector.init_afd_connector()
-
     def may_add_encoder_only_layers_to_kv_cache_config(self) -> None:
         """
         Add encoder-only layers to the KV cache config.