Merge branch 'main' into fix_ut

This commit is contained in:
Fanli Lin 2025-10-17 17:05:59 +08:00 committed by GitHub
commit 6ab025c10a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 1 additions and 35 deletions

View File

@ -38,7 +38,7 @@ If other strategies don't solve the problem, it's likely that the vLLM instance
- `export VLLM_LOG_STATS_INTERVAL=1.` to get log statistics more frequently for tracking running queue, waiting queue and cache hit states.
- `export CUDA_LAUNCH_BLOCKING=1` to identify which CUDA kernel is causing the problem.
- `export NCCL_DEBUG=TRACE` to turn on more logging for NCCL.
- `export VLLM_TRACE_FUNCTION=1` to record all function calls for inspection in the log files to tell which function crashes or hangs. Do not use this flag unless absolutely needed for debugging, it will cause significant delays in startup time.
- `export VLLM_TRACE_FUNCTION=1` to record all function calls for inspection in the log files to tell which function crashes or hangs. (WARNING: This flag will slow down the token generation by **over 100x**. Do not use unless absolutely needed.)
## Breakpoints

View File

@ -4,7 +4,6 @@
import numpy as np
import torch
from transformers import PretrainedConfig
from vllm.triton_utils import tl, triton
@ -376,39 +375,6 @@ class MRotaryEmbedding(RotaryEmbedding):
) -> tuple[torch.Tensor, torch.Tensor | None]:
return self.forward_native(positions, query, key, offsets)
@classmethod
def get_input_positions(
cls,
input_tokens: list[int],
hf_config: PretrainedConfig,
image_grid_thw: list[list[int]] | torch.Tensor | None,
video_grid_thw: list[list[int]] | torch.Tensor | None,
second_per_grid_ts: list[float] | None,
context_len: int = 0,
seq_len: int | None = None,
audio_feature_lengths: torch.Tensor | None = None,
use_audio_in_video: bool = False,
) -> tuple[list[list[int]], int]:
"""Get mrope input positions and delta value."""
image_grid_thw = [] if image_grid_thw is None else image_grid_thw
video_grid_thw = [] if video_grid_thw is None else video_grid_thw
second_per_grid_ts = [] if second_per_grid_ts is None else second_per_grid_ts
llm_positions, mrope_position_delta = cls.get_input_positions_tensor(
input_tokens=input_tokens,
hf_config=hf_config,
image_grid_thw=image_grid_thw,
video_grid_thw=video_grid_thw,
second_per_grid_ts=second_per_grid_ts,
context_len=context_len,
seq_len=seq_len,
audio_feature_lengths=audio_feature_lengths,
use_audio_in_video=use_audio_in_video,
)
return llm_positions.tolist(), mrope_position_delta
@staticmethod
def get_next_input_positions(
mrope_position_delta: int,