mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 16:27:15 +08:00
[docs][misc] IOProcessor plugins fixes (#24046)
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
This commit is contained in:
parent
39a22dcaac
commit
cf91a89dd2
@ -64,9 +64,9 @@ The `parse_request` method is used for validating the user prompt and converting
|
||||
The `pre_process*` methods take the validated plugin input to generate vLLM's model prompts for regular inference.
|
||||
The `post_process*` methods take `PoolingRequestOutput` objects as input and generate a custom plugin output.
|
||||
|
||||
The `output_to_response` method is used only for online serving and converts the plugin output to the `IOProcessorResponse` type that is then returned by the API Server. The implementation of the `/io_processor_pooling` serving endpoint is [here](../../vllm/entrypoints/openai/serving_pooling_with_io_plugin.py).
|
||||
The `output_to_response` method is used only for online serving and converts the plugin output to the `IOProcessorResponse` type that is then returned by the API Server. The implementation of the `/io_processor_pooling` serving endpoint is available here <gh-file:vllm/entrypoints/openai/serving_pooling_with_io_plugin.py>.
|
||||
|
||||
An example implementation of a plugin that enables generating geotiff images with the PrithviGeospatialMAE model is available [here](https://github.com/christian-pinto/prithvi_io_processor_plugin). Please, also refer to our [online](../../examples/online_serving/prithvi_geospatial_mae.py) and [offline](../../examples/offline_inference/prithvi_geospatial_mae_io_processor.py) inference examples.
|
||||
An example implementation of a plugin that enables generating geotiff images with the PrithviGeospatialMAE model is available [here](https://github.com/christian-pinto/prithvi_io_processor_plugin). Please, also refer to our online (<gh-file:examples/online_serving/prithvi_geospatial_mae.py>) and offline (<gh-file:examples/offline_inference/prithvi_geospatial_mae_io_processor.py>) inference examples.
|
||||
|
||||
## Using an IO Processor plugin
|
||||
|
||||
|
||||
@ -33,6 +33,7 @@ def main():
|
||||
},
|
||||
"priority": 0,
|
||||
"model": "christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM",
|
||||
"softmax": False,
|
||||
}
|
||||
|
||||
ret = requests.post(server_endpoint, json=request_payload_url)
|
||||
|
||||
@ -8,7 +8,7 @@ import datetime
|
||||
import os
|
||||
import tempfile
|
||||
import urllib.request
|
||||
from collections.abc import AsyncGenerator, Sequence
|
||||
from collections.abc import Sequence
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import albumentations
|
||||
@ -359,14 +359,6 @@ class PrithviMultimodalDataProcessor(IOProcessor):
|
||||
|
||||
return prompts
|
||||
|
||||
async def pre_process_async(
|
||||
self,
|
||||
prompt: IOProcessorInput,
|
||||
request_id: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[PromptType, Sequence[PromptType]]:
|
||||
return self.pre_process(prompt, request_id, **kwargs)
|
||||
|
||||
def post_process(
|
||||
self,
|
||||
model_output: Sequence[PoolingRequestOutput],
|
||||
@ -421,15 +413,6 @@ class PrithviMultimodalDataProcessor(IOProcessor):
|
||||
data=out_data,
|
||||
request_id=request_id)
|
||||
|
||||
async def post_process_async(
|
||||
self,
|
||||
model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
|
||||
request_id: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> IOProcessorOutput:
|
||||
collected_output = [item async for i, item in model_output]
|
||||
return self.post_process(collected_output, request_id, **kwargs)
|
||||
|
||||
|
||||
class PrithviMultimodalDataProcessorIndia(PrithviMultimodalDataProcessor):
|
||||
|
||||
|
||||
@ -113,6 +113,7 @@ async def test_prithvi_mae_plugin_online(
|
||||
},
|
||||
"priority": 0,
|
||||
"model": model_name,
|
||||
"softmax": False
|
||||
}
|
||||
|
||||
ret = requests.post(
|
||||
|
||||
@ -1424,9 +1424,10 @@ class IOProcessorRequest(OpenAIBaseModel, Generic[T]):
|
||||
When using plugins IOProcessor plugins, the actual input is processed
|
||||
by the plugin itself. Hence, we use a generic type for the request data
|
||||
"""
|
||||
softmax: bool = True
|
||||
|
||||
def to_pooling_params(self):
|
||||
return PoolingParams(task="encode")
|
||||
return PoolingParams(task="encode", softmax=self.softmax)
|
||||
|
||||
|
||||
class IOProcessorResponse(OpenAIBaseModel, Generic[T]):
|
||||
|
||||
@ -49,7 +49,12 @@ class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
|
||||
request_id: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> IOProcessorOutput:
|
||||
collected_output = [item async for i, item in model_output]
|
||||
# We cannot guarantee outputs are returned in the same order they were
|
||||
# fed to vLLM.
|
||||
# Let's sort them by id before post_processing
|
||||
sorted_output = sorted([(i, item) async for i, item in model_output],
|
||||
key=lambda output: output[0])
|
||||
collected_output = [output[1] for output in sorted_output]
|
||||
return self.post_process(collected_output, request_id, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
@ -59,4 +64,4 @@ class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
|
||||
@abstractmethod
|
||||
def output_to_response(
|
||||
self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user