From e9ba99f296e77038f59f59e33579f4011ed56411 Mon Sep 17 00:00:00 2001 From: Shanshan Shen <467638484@qq.com> Date: Mon, 7 Apr 2025 19:06:24 +0800 Subject: [PATCH] [V1][Structured Output] Add `supports_structured_output()` method to Platform (#16148) Signed-off-by: shen-shanshan <467638484@qq.com> --- vllm/platforms/cpu.py | 4 ++++ vllm/platforms/cuda.py | 4 ++++ vllm/platforms/hpu.py | 4 ++++ vllm/platforms/interface.py | 7 +++++++ vllm/platforms/neuron.py | 4 ++++ vllm/platforms/rocm.py | 4 ++++ vllm/platforms/tpu.py | 5 +++++ vllm/platforms/xpu.py | 4 ++++ vllm/v1/engine/processor.py | 8 +++++--- 9 files changed, 41 insertions(+), 3 deletions(-) diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 67466bdb98075..cfd7bc2a40571 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -180,3 +180,7 @@ class CpuPlatform(Platform): Get device specific communicator class for distributed communication. """ return "vllm.distributed.device_communicators.cpu_communicator.CpuCommunicator" # noqa + + @classmethod + def supports_structured_output(cls) -> bool: + return True diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 0576022be448b..053cf74ebceb0 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -308,6 +308,10 @@ class CudaPlatformBase(Platform): def supports_v1(cls, model_config: ModelConfig) -> bool: return True + @classmethod + def supports_structured_output(cls) -> bool: + return True + @classmethod def use_custom_allreduce(cls) -> bool: return True diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index 4c842b5251105..f011f14029a39 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -92,3 +92,7 @@ class HpuPlatform(Platform): @classmethod def get_device_communicator_cls(cls) -> str: return "vllm.distributed.device_communicators.hpu_communicator.HpuCommunicator" # noqa + + @classmethod + def supports_structured_output(cls) -> bool: + return True diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index b6f6029de9c82..2bb543bd73f70 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -379,6 +379,13 @@ class Platform: """ return False + @classmethod + def supports_structured_output(cls) -> bool: + """ + Returns whether the current platform can support structured output. + """ + return False + @classmethod def use_custom_allreduce(cls) -> bool: """ diff --git a/vllm/platforms/neuron.py b/vllm/platforms/neuron.py index c1f426e5b8801..93657881cbdd8 100644 --- a/vllm/platforms/neuron.py +++ b/vllm/platforms/neuron.py @@ -67,3 +67,7 @@ class NeuronPlatform(Platform): @classmethod def use_all_gather(cls) -> bool: return True + + @classmethod + def supports_structured_output(cls) -> bool: + return True diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index d18b7c26f7ec5..a2fbf416ecf20 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -303,6 +303,10 @@ class RocmPlatform(Platform): # V1 support on AMD gpus is experimental return True + @classmethod + def supports_structured_output(cls) -> bool: + return True + @classmethod def use_custom_allreduce(cls) -> bool: # We only enable custom allreduce for MI300 series diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 43d3044cb93ee..eeadb4a71e5e7 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -133,3 +133,8 @@ class TpuPlatform(Platform): def supports_v1(cls, model_config: ModelConfig) -> bool: # V1 support on TPU is experimental return True + + @classmethod + def supports_structured_output(cls) -> bool: + # Structured output is not supported on TPU. + return False diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 225e756cd7ce8..c4bd639384a40 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -140,3 +140,7 @@ class XPUPlatform(Platform): @classmethod def get_device_communicator_cls(cls) -> str: return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa + + @classmethod + def supports_structured_output(cls) -> bool: + return True diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 0d2892837eb28..403edddfcbee6 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -136,9 +136,11 @@ class Processor: f" != {engine_level_backend}") else: params.guided_decoding.backend = engine_level_backend - import vllm.platforms - if vllm.platforms.current_platform.is_tpu(): - raise ValueError("Structured output is not supported on TPU.") + + from vllm.platforms import current_platform + if not current_platform.supports_structured_output(): + raise ValueError("Structured output is not supported on " + f"{current_platform.device_name}.") # Request content validation if engine_level_backend.startswith("xgrammar"):