diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 441ccde046eb9..40accfffe4f9d 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -233,7 +233,13 @@ class LlavaDummyInputsBuilder(BaseDummyInputsBuilder[_I]): class LlavaProcessingInfo(BaseLlavaProcessingInfo): def get_hf_processor(self, **kwargs: object): - return self.ctx.get_hf_processor(LlavaProcessor, **kwargs) + hf_processor = self.ctx.get_hf_processor(LlavaProcessor, **kwargs) + # In case patch_size is omitted from `processor_config.json` + # e.g. for E5-V: https://huggingface.co/royokong/e5-v + if hf_processor.patch_size is None: + patch_size = self.get_vision_encoder_info().get_patch_size() + hf_processor.patch_size = patch_size + return hf_processor class BaseLlavaMultiModalProcessor(BaseMultiModalProcessor[_I]):