[docs][misc] IOProcessor plugins fixes (#24046)

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
This commit is contained in:
Christian Pinto 2025-09-01 17:17:41 +01:00 committed by GitHub
parent 39a22dcaac
commit cf91a89dd2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 14 additions and 23 deletions

View File

@ -64,9 +64,9 @@ The `parse_request` method is used for validating the user prompt and converting
The `pre_process*` methods take the validated plugin input to generate vLLM's model prompts for regular inference.
The `post_process*` methods take `PoolingRequestOutput` objects as input and generate a custom plugin output.
The `output_to_response` method is used only for online serving and converts the plugin output to the `IOProcessorResponse` type that is then returned by the API Server. The implementation of the `/io_processor_pooling` serving endpoint is [here](../../vllm/entrypoints/openai/serving_pooling_with_io_plugin.py).
The `output_to_response` method is used only for online serving and converts the plugin output to the `IOProcessorResponse` type that is then returned by the API Server. The implementation of the `/io_processor_pooling` serving endpoint is available here <gh-file:vllm/entrypoints/openai/serving_pooling_with_io_plugin.py>.
An example implementation of a plugin that enables generating geotiff images with the PrithviGeospatialMAE model is available [here](https://github.com/christian-pinto/prithvi_io_processor_plugin). Please, also refer to our [online](../../examples/online_serving/prithvi_geospatial_mae.py) and [offline](../../examples/offline_inference/prithvi_geospatial_mae_io_processor.py) inference examples.
An example implementation of a plugin that enables generating geotiff images with the PrithviGeospatialMAE model is available [here](https://github.com/christian-pinto/prithvi_io_processor_plugin). Please, also refer to our online (<gh-file:examples/online_serving/prithvi_geospatial_mae.py>) and offline (<gh-file:examples/offline_inference/prithvi_geospatial_mae_io_processor.py>) inference examples.
## Using an IO Processor plugin

View File

@ -33,6 +33,7 @@ def main():
},
"priority": 0,
"model": "christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM",
"softmax": False,
}
ret = requests.post(server_endpoint, json=request_payload_url)

View File

@ -8,7 +8,7 @@ import datetime
import os
import tempfile
import urllib.request
from collections.abc import AsyncGenerator, Sequence
from collections.abc import Sequence
from typing import Any, Optional, Union
import albumentations
@ -359,14 +359,6 @@ class PrithviMultimodalDataProcessor(IOProcessor):
return prompts
async def pre_process_async(
self,
prompt: IOProcessorInput,
request_id: Optional[str] = None,
**kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
return self.pre_process(prompt, request_id, **kwargs)
def post_process(
self,
model_output: Sequence[PoolingRequestOutput],
@ -421,15 +413,6 @@ class PrithviMultimodalDataProcessor(IOProcessor):
data=out_data,
request_id=request_id)
async def post_process_async(
self,
model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
request_id: Optional[str] = None,
**kwargs,
) -> IOProcessorOutput:
collected_output = [item async for i, item in model_output]
return self.post_process(collected_output, request_id, **kwargs)
class PrithviMultimodalDataProcessorIndia(PrithviMultimodalDataProcessor):

View File

@ -113,6 +113,7 @@ async def test_prithvi_mae_plugin_online(
},
"priority": 0,
"model": model_name,
"softmax": False
}
ret = requests.post(

View File

@ -1424,9 +1424,10 @@ class IOProcessorRequest(OpenAIBaseModel, Generic[T]):
When using plugins IOProcessor plugins, the actual input is processed
by the plugin itself. Hence, we use a generic type for the request data
"""
softmax: bool = True
def to_pooling_params(self):
return PoolingParams(task="encode")
return PoolingParams(task="encode", softmax=self.softmax)
class IOProcessorResponse(OpenAIBaseModel, Generic[T]):

View File

@ -49,7 +49,12 @@ class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
request_id: Optional[str] = None,
**kwargs,
) -> IOProcessorOutput:
collected_output = [item async for i, item in model_output]
# We cannot guarantee outputs are returned in the same order they were
# fed to vLLM.
# Let's sort them by id before post_processing
sorted_output = sorted([(i, item) async for i, item in model_output],
key=lambda output: output[0])
collected_output = [output[1] for output in sorted_output]
return self.post_process(collected_output, request_id, **kwargs)
@abstractmethod
@ -59,4 +64,4 @@ class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
@abstractmethod
def output_to_response(
self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
raise NotImplementedError
raise NotImplementedError