diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 0eade272111f..ee4c5c8bacaa 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -4,7 +4,7 @@ import asyncio import json import time -from collections.abc import AsyncGenerator +from collections.abc import AsyncGenerator, Sequence from typing import Final, cast import jinja2 @@ -122,6 +122,10 @@ class OpenAIServingPooling(OpenAIServing): engine_prompts = await self.io_processor.pre_process_async( prompt=validated_prompt, request_id=request_id ) + if not isinstance(engine_prompts, Sequence) or isinstance( + engine_prompts, (str, bytes, bytearray) + ): + engine_prompts = [engine_prompts] elif isinstance(request, PoolingChatRequest): error_check_ret = self._validate_chat_template(