From 4dd79783744adbfdc86f9454bffb5a92715a7f61 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 2 Dec 2025 18:33:45 -0800 Subject: [PATCH] [Bugfix] Fix regression on pooling models from PR#29621 (#29921) Signed-off-by: Roger Wang Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/multimodal/parse.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/multimodal/parse.py b/vllm/multimodal/parse.py index 0d3b8289e4e1..650368dcb8fc 100644 --- a/vllm/multimodal/parse.py +++ b/vllm/multimodal/parse.py @@ -134,11 +134,17 @@ class EmbeddingItems( or a list of embedding tensors (one per item). """ + def _unwrap( + self, item: torch.Tensor | MediaWithBytes[torch.Tensor] + ) -> torch.Tensor: + """Extract media from wrapper if present.""" + return item.media if isinstance(item, MediaWithBytes) else item + def get_count(self) -> int: return len(self.data) def get(self, index: int) -> torch.Tensor: - return self.data[index] + return self._unwrap(self.data[index]) def get_processor_data(self) -> Mapping[str, object]: return {}