diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 0390773783961..f337bc9b0f7ba 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -1389,6 +1389,22 @@ class BaseMultiModalProcessor(ABC, Generic[_I]): """Given the HF-processed data, output the metadata of each field.""" raise NotImplementedError + @abstractmethod + def get_num_mm_encoder_tokens( + self, + num_image_tokens: int, + ) -> int: + """Given the number of image tokens, output the number of multi-modal encoder tokens""" + raise NotImplementedError + + @abstractmethod + def get_num_mm_connector_tokens( + self, + num_vision_tokens: int, + ) -> int: + """Given the number of vision tokens, output the number of multi-modal connector tokens""" + raise NotImplementedError + @abstractmethod def _get_prompt_updates( self,