From c428e8d80b2bc17b0a306d1e80c8e4567b9dd9f4 Mon Sep 17 00:00:00 2001 From: baonudesifeizhai <85092850+baonudesifeizhai@users.noreply.github.com> Date: Thu, 13 Nov 2025 06:34:14 -0500 Subject: [PATCH] Fix io processor pooling #28273 (#28484) Signed-off-by: baonudesifeizhai --- vllm/entrypoints/openai/serving_pooling.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 0eade272111f..ee4c5c8bacaa 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -4,7 +4,7 @@ import asyncio import json import time -from collections.abc import AsyncGenerator +from collections.abc import AsyncGenerator, Sequence from typing import Final, cast import jinja2 @@ -122,6 +122,10 @@ class OpenAIServingPooling(OpenAIServing): engine_prompts = await self.io_processor.pre_process_async( prompt=validated_prompt, request_id=request_id ) + if not isinstance(engine_prompts, Sequence) or isinstance( + engine_prompts, (str, bytes, bytearray) + ): + engine_prompts = [engine_prompts] elif isinstance(request, PoolingChatRequest): error_check_ret = self._validate_chat_template(