From bbc1b29665ad8b1574ccfcfab7af7da492aa4d3b Mon Sep 17 00:00:00 2001
From: cong-meta <prowindy@hotmail.com>
Date: Fri, 17 Oct 2025 01:53:06 -0700
Subject: [PATCH 1/2] Update troubleshooting.md and remind VLLM_TRACE_FUNCTION
 usage (#27069)

Signed-off-by: cong-meta <prowindy@hotmail.com>
---
 docs/usage/troubleshooting.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage/troubleshooting.md b/docs/usage/troubleshooting.md
index f8337a596f290..042b83056cfb5 100644
--- a/docs/usage/troubleshooting.md
+++ b/docs/usage/troubleshooting.md
@@ -38,7 +38,7 @@ If other strategies don't solve the problem, it's likely that the vLLM instance
 - `export VLLM_LOG_STATS_INTERVAL=1.` to get log statistics more frequently for tracking running queue, waiting queue and cache hit states.
 - `export CUDA_LAUNCH_BLOCKING=1` to identify which CUDA kernel is causing the problem.
 - `export NCCL_DEBUG=TRACE` to turn on more logging for NCCL.
-- `export VLLM_TRACE_FUNCTION=1` to record all function calls for inspection in the log files to tell which function crashes or hangs. Do not use this flag unless absolutely needed for debugging, it will cause significant delays in startup time.
+- `export VLLM_TRACE_FUNCTION=1` to record all function calls for inspection in the log files to tell which function crashes or hangs. (WARNING: This flag will slow down the token generation by **over 100x**. Do not use unless absolutely needed.)
 
 ## Breakpoints
 

From e20eba753bbced43837aa92f747e6c50ee36ce09 Mon Sep 17 00:00:00 2001
From: Mengqing Cao <cmq0113@163.com>
Date: Fri, 17 Oct 2025 17:00:30 +0800
Subject: [PATCH 2/2] [VLM][Refactor] Remove useless func `get_input_positions`
 in `MRotaryEmbedding` (#27088)

Signed-off-by: MengqingCao <cmq0113@163.com>
---
 .../layers/rotary_embedding/mrope.py          | 34 -------------------
 1 file changed, 34 deletions(-)

diff --git a/vllm/model_executor/layers/rotary_embedding/mrope.py b/vllm/model_executor/layers/rotary_embedding/mrope.py
index 5cae3d9b80fa7..d269733083d83 100644
--- a/vllm/model_executor/layers/rotary_embedding/mrope.py
+++ b/vllm/model_executor/layers/rotary_embedding/mrope.py
@@ -4,7 +4,6 @@
 
 import numpy as np
 import torch
-from transformers import PretrainedConfig
 
 from vllm.triton_utils import tl, triton
 
@@ -376,39 +375,6 @@ class MRotaryEmbedding(RotaryEmbedding):
     ) -> tuple[torch.Tensor, torch.Tensor | None]:
         return self.forward_native(positions, query, key, offsets)
 
-    @classmethod
-    def get_input_positions(
-        cls,
-        input_tokens: list[int],
-        hf_config: PretrainedConfig,
-        image_grid_thw: list[list[int]] | torch.Tensor | None,
-        video_grid_thw: list[list[int]] | torch.Tensor | None,
-        second_per_grid_ts: list[float] | None,
-        context_len: int = 0,
-        seq_len: int | None = None,
-        audio_feature_lengths: torch.Tensor | None = None,
-        use_audio_in_video: bool = False,
-    ) -> tuple[list[list[int]], int]:
-        """Get mrope input positions and delta value."""
-
-        image_grid_thw = [] if image_grid_thw is None else image_grid_thw
-        video_grid_thw = [] if video_grid_thw is None else video_grid_thw
-        second_per_grid_ts = [] if second_per_grid_ts is None else second_per_grid_ts
-
-        llm_positions, mrope_position_delta = cls.get_input_positions_tensor(
-            input_tokens=input_tokens,
-            hf_config=hf_config,
-            image_grid_thw=image_grid_thw,
-            video_grid_thw=video_grid_thw,
-            second_per_grid_ts=second_per_grid_ts,
-            context_len=context_len,
-            seq_len=seq_len,
-            audio_feature_lengths=audio_feature_lengths,
-            use_audio_in_video=use_audio_in_video,
-        )
-
-        return llm_positions.tolist(), mrope_position_delta
-
     @staticmethod
     def get_next_input_positions(
         mrope_position_delta: int,