mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 11:05:42 +08:00
[V0 Deprecation] Remove V0-only methods in multi-modal registry (#25362)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
1b3aa0f297
commit
243c358fa8
@ -209,7 +209,6 @@ def batch_make_video_embeddings(
|
|||||||
return visual(pixel_values_on_device,
|
return visual(pixel_values_on_device,
|
||||||
grid_thw=video_grid_thw_on_device).cpu()
|
grid_thw=video_grid_thw_on_device).cpu()
|
||||||
|
|
||||||
# V1 Test: this calls a V0 internal.
|
|
||||||
video_embeds = torch.concat(llm.apply_model(get_image_embeds))
|
video_embeds = torch.concat(llm.apply_model(get_image_embeds))
|
||||||
|
|
||||||
# split into original batches
|
# split into original batches
|
||||||
|
|||||||
@ -12,8 +12,7 @@ from vllm.transformers_utils.tokenizer import (AnyTokenizer,
|
|||||||
cached_tokenizer_from_config)
|
cached_tokenizer_from_config)
|
||||||
from vllm.utils import ClassRegistry
|
from vllm.utils import ClassRegistry
|
||||||
|
|
||||||
from .cache import (BaseMultiModalProcessorCache,
|
from .cache import BaseMultiModalProcessorCache
|
||||||
processor_only_cache_from_config)
|
|
||||||
from .processing import BaseMultiModalProcessor, BaseProcessingInfo
|
from .processing import BaseMultiModalProcessor, BaseProcessingInfo
|
||||||
from .profiling import (BaseDummyInputsBuilder, DummyDecoderData,
|
from .profiling import (BaseDummyInputsBuilder, DummyDecoderData,
|
||||||
DummyEncoderData, MultiModalProfiler)
|
DummyEncoderData, MultiModalProfiler)
|
||||||
@ -176,35 +175,6 @@ class MultiModalRegistry:
|
|||||||
if mm_limits[key] > 0
|
if mm_limits[key] > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: Remove once V0 is gone
|
|
||||||
def get_max_tokens_by_modality(
|
|
||||||
self,
|
|
||||||
model_config: "ModelConfig",
|
|
||||||
) -> Mapping[str, int]:
|
|
||||||
"""
|
|
||||||
Get the maximum number of tokens from each modality
|
|
||||||
for profiling the memory usage of a model.
|
|
||||||
"""
|
|
||||||
cache = processor_only_cache_from_config(model_config, self)
|
|
||||||
mm_limits = self.get_mm_limits_per_prompt(model_config, cache=cache)
|
|
||||||
max_tokens_per_item = self.get_max_tokens_per_item_by_modality(
|
|
||||||
model_config,
|
|
||||||
cache=cache,
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
key: mm_limits[key] * max_tokens_per_mm_item
|
|
||||||
for key, max_tokens_per_mm_item in max_tokens_per_item.items()
|
|
||||||
}
|
|
||||||
|
|
||||||
# TODO: Remove once V0 is gone
|
|
||||||
def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int:
|
|
||||||
"""
|
|
||||||
Get the maximum number of multi-modal tokens
|
|
||||||
for profiling the memory usage of a model.
|
|
||||||
"""
|
|
||||||
return sum(self.get_max_tokens_by_modality(model_config).values())
|
|
||||||
|
|
||||||
def get_mm_limits_per_prompt(
|
def get_mm_limits_per_prompt(
|
||||||
self,
|
self,
|
||||||
model_config: "ModelConfig",
|
model_config: "ModelConfig",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user