mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-10 01:15:41 +08:00
[Bugfix] Fix Qwen3-VL max_num_video_tokens calculation for video profiling (#25648)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
3c2b2ccece
commit
17b4c6685c
@ -82,7 +82,7 @@ from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model
|
|||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
# For profile run
|
# For profile run
|
||||||
_MAX_FRAMES_PER_VIDEO = 600
|
_MAX_FRAMES_PER_VIDEO = 32
|
||||||
|
|
||||||
# === Vision Inputs === #
|
# === Vision Inputs === #
|
||||||
|
|
||||||
|
|||||||
@ -715,6 +715,18 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
|
|||||||
video_items.append(video_item)
|
video_items.append(video_item)
|
||||||
return video_items
|
return video_items
|
||||||
|
|
||||||
|
def get_dummy_processor_inputs(self, seq_len, mm_counts):
|
||||||
|
processor_inputs = super().get_dummy_processor_inputs(
|
||||||
|
seq_len, mm_counts)
|
||||||
|
# HACK(Isotr0py): We set do_resize to False here to reuse Qwen2-VL's
|
||||||
|
# profiling logic, which will be problematic for configurable mm
|
||||||
|
# profiling.
|
||||||
|
# TODO(Isotr0py): Switch to the implementation in
|
||||||
|
# https://github.com/vllm-project/vllm/pull/25557
|
||||||
|
# after supporting configurable mm profiling.
|
||||||
|
processor_inputs.hf_processor_mm_kwargs = {"do_resize": False}
|
||||||
|
return processor_inputs
|
||||||
|
|
||||||
|
|
||||||
class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo]
|
class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo]
|
||||||
):
|
):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user