[docs][misc] IOProcessor plugins fixes (#24046)

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
This commit is contained in:
Christian Pinto 2025-09-01 17:17:41 +01:00 committed by GitHub
parent 39a22dcaac
commit cf91a89dd2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 14 additions and 23 deletions

View File

@ -64,9 +64,9 @@ The `parse_request` method is used for validating the user prompt and converting
The `pre_process*` methods take the validated plugin input to generate vLLM's model prompts for regular inference. The `pre_process*` methods take the validated plugin input to generate vLLM's model prompts for regular inference.
The `post_process*` methods take `PoolingRequestOutput` objects as input and generate a custom plugin output. The `post_process*` methods take `PoolingRequestOutput` objects as input and generate a custom plugin output.
The `output_to_response` method is used only for online serving and converts the plugin output to the `IOProcessorResponse` type that is then returned by the API Server. The implementation of the `/io_processor_pooling` serving endpoint is [here](../../vllm/entrypoints/openai/serving_pooling_with_io_plugin.py). The `output_to_response` method is used only for online serving and converts the plugin output to the `IOProcessorResponse` type that is then returned by the API Server. The implementation of the `/io_processor_pooling` serving endpoint is available here <gh-file:vllm/entrypoints/openai/serving_pooling_with_io_plugin.py>.
An example implementation of a plugin that enables generating geotiff images with the PrithviGeospatialMAE model is available [here](https://github.com/christian-pinto/prithvi_io_processor_plugin). Please, also refer to our [online](../../examples/online_serving/prithvi_geospatial_mae.py) and [offline](../../examples/offline_inference/prithvi_geospatial_mae_io_processor.py) inference examples. An example implementation of a plugin that enables generating geotiff images with the PrithviGeospatialMAE model is available [here](https://github.com/christian-pinto/prithvi_io_processor_plugin). Please, also refer to our online (<gh-file:examples/online_serving/prithvi_geospatial_mae.py>) and offline (<gh-file:examples/offline_inference/prithvi_geospatial_mae_io_processor.py>) inference examples.
## Using an IO Processor plugin ## Using an IO Processor plugin

View File

@ -33,6 +33,7 @@ def main():
}, },
"priority": 0, "priority": 0,
"model": "christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM", "model": "christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM",
"softmax": False,
} }
ret = requests.post(server_endpoint, json=request_payload_url) ret = requests.post(server_endpoint, json=request_payload_url)

View File

@ -8,7 +8,7 @@ import datetime
import os import os
import tempfile import tempfile
import urllib.request import urllib.request
from collections.abc import AsyncGenerator, Sequence from collections.abc import Sequence
from typing import Any, Optional, Union from typing import Any, Optional, Union
import albumentations import albumentations
@ -359,14 +359,6 @@ class PrithviMultimodalDataProcessor(IOProcessor):
return prompts return prompts
async def pre_process_async(
self,
prompt: IOProcessorInput,
request_id: Optional[str] = None,
**kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
return self.pre_process(prompt, request_id, **kwargs)
def post_process( def post_process(
self, self,
model_output: Sequence[PoolingRequestOutput], model_output: Sequence[PoolingRequestOutput],
@ -421,15 +413,6 @@ class PrithviMultimodalDataProcessor(IOProcessor):
data=out_data, data=out_data,
request_id=request_id) request_id=request_id)
async def post_process_async(
self,
model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
request_id: Optional[str] = None,
**kwargs,
) -> IOProcessorOutput:
collected_output = [item async for i, item in model_output]
return self.post_process(collected_output, request_id, **kwargs)
class PrithviMultimodalDataProcessorIndia(PrithviMultimodalDataProcessor): class PrithviMultimodalDataProcessorIndia(PrithviMultimodalDataProcessor):

View File

@ -113,6 +113,7 @@ async def test_prithvi_mae_plugin_online(
}, },
"priority": 0, "priority": 0,
"model": model_name, "model": model_name,
"softmax": False
} }
ret = requests.post( ret = requests.post(

View File

@ -1424,9 +1424,10 @@ class IOProcessorRequest(OpenAIBaseModel, Generic[T]):
When using plugins IOProcessor plugins, the actual input is processed When using plugins IOProcessor plugins, the actual input is processed
by the plugin itself. Hence, we use a generic type for the request data by the plugin itself. Hence, we use a generic type for the request data
""" """
softmax: bool = True
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams(task="encode") return PoolingParams(task="encode", softmax=self.softmax)
class IOProcessorResponse(OpenAIBaseModel, Generic[T]): class IOProcessorResponse(OpenAIBaseModel, Generic[T]):

View File

@ -49,7 +49,12 @@ class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
request_id: Optional[str] = None, request_id: Optional[str] = None,
**kwargs, **kwargs,
) -> IOProcessorOutput: ) -> IOProcessorOutput:
collected_output = [item async for i, item in model_output] # We cannot guarantee outputs are returned in the same order they were
# fed to vLLM.
# Let's sort them by id before post_processing
sorted_output = sorted([(i, item) async for i, item in model_output],
key=lambda output: output[0])
collected_output = [output[1] for output in sorted_output]
return self.post_process(collected_output, request_id, **kwargs) return self.post_process(collected_output, request_id, **kwargs)
@abstractmethod @abstractmethod
@ -59,4 +64,4 @@ class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
@abstractmethod @abstractmethod
def output_to_response( def output_to_response(
self, plugin_output: IOProcessorOutput) -> IOProcessorResponse: self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
raise NotImplementedError raise NotImplementedError