mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 23:35:01 +08:00
[V1][Misc] Avoid unnecessary log output (#13289)
This commit is contained in:
parent
69e1d23e1e
commit
2010f04c17
@ -96,10 +96,12 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
self.mm_registry = MULTIMODAL_REGISTRY
|
self.mm_registry = MULTIMODAL_REGISTRY
|
||||||
self.uses_mrope = model_config.uses_mrope
|
self.uses_mrope = model_config.uses_mrope
|
||||||
|
|
||||||
|
if self.is_multimodal_model:
|
||||||
# NOTE: Initialized client is only used for processing dummy
|
# NOTE: Initialized client is only used for processing dummy
|
||||||
# multimodal data into multimodal kwargs for GPU memory profiling.
|
# multimodal data into multimodal kwargs for GPU memory profiling.
|
||||||
# Only applicable to multimodal models with legacy input mapper.
|
# Only applicable to multimodal models with legacy input mapper.
|
||||||
self.mm_input_mapper_profiling = MMInputCacheClient(self.model_config)
|
self.mm_input_mapper_profiling = MMInputCacheClient(
|
||||||
|
self.model_config)
|
||||||
self.mm_input_mapper_profiling.use_cache = False
|
self.mm_input_mapper_profiling.use_cache = False
|
||||||
|
|
||||||
encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
|
encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user