[V0 deprecation] Remove platform v1 controling interface (#25410)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-12-10 13:36:12 +08:00 · 2025-09-23 09:48:12 +08:00 · 2025-09-23 09:48:12 +08:00 · 6fa78d8f23
commit 6fa78d8f23
parent 9949aa2ef1
8 changed files with 0 additions and 81 deletions
--- a/tests/v1/test_async_llm_dp.py
+++ b/tests/v1/test_async_llm_dp.py
@ -13,7 +13,6 @@ from vllm import SamplingParams
 from vllm.config import VllmConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.inputs import PromptType
-from vllm.platforms import current_platform
 from vllm.sampling_params import RequestOutputKind
 from vllm.v1.engine.async_llm import AsyncLLM
 from vllm.v1.engine.core_client import DPAsyncMPClient
@ -29,10 +28,6 @@ engine_args = AsyncEngineArgs(
    data_parallel_size=DP_SIZE,
 )

-if not current_platform.supports_v1(engine_args.create_model_config()):
-    pytest.skip(reason="Requires V1-supporting platform.",
-                allow_module_level=True)
-

 async def generate(
        engine: AsyncLLM,
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -1502,12 +1502,6 @@ class EngineArgs:
            _raise_or_fallback(feature_name=name, recommend_to_remove=True)
            return False

-        # Platforms must decide if they can support v1 for this model
-        if not current_platform.supports_v1(model_config=model_config):
-            _raise_or_fallback(
-                feature_name=f"device type={current_platform.device_type}",
-                recommend_to_remove=False)
-            return False
        #############################################################
        # Experimental Features - allow users to opt in.

@ -1524,12 +1518,6 @@ class EngineArgs:
                                   recommend_to_remove=False)
                return False

-        # The platform may be supported on V1, but off by default for now.
-        if not current_platform.default_v1(  # noqa: SIM103
-                model_config=model_config) and _warn_or_fallback(
-                    current_platform.device_name):
-            return False
-
        if (current_platform.is_cpu()
                and model_config.get_sliding_window() is not None):
            _raise_or_fallback(feature_name="sliding window (CPU backend)",
@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
    logger.warning(msg)


-def _warn_or_fallback(feature_name: str) -> bool:
-    if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
-        logger.warning(
-            "Detected VLLM_USE_V1=1 with %s. Usage should "
-            "be considered experimental. Please report any "
-            "issues on Github.", feature_name)
-        should_exit = False
-    else:
-        logger.info(
-            "%s is experimental on VLLM_USE_V1=1. "
-            "Falling back to V0 Engine.", feature_name)
-        should_exit = True
-    return should_exit
-
-
 def human_readable_int(value):
    """Parse human-readable integers like '1k', '2M', etc.
    Including decimal values with decimal multipliers.
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@ -328,23 +328,6 @@ class CpuPlatform(Platform):
    def supports_structured_output(cls) -> bool:
        return True

-    @classmethod
-    def supports_v1(cls, model_config) -> bool:
-        """Returns whether the current platform can support v1 for the supplied
-        model configuration.
-        """
-        return True
-
-    @classmethod
-    def default_v1(cls, model_config) -> bool:
-        """Returns whether the current platform can use v1 by default for the
-        supplied model configuration.
-        """
-        arch = cls.get_cpu_architecture()
-        return (cls.supports_v1(model_config)
-                and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
-                             CpuArchEnum.ARM, CpuArchEnum.S390X))
-
    @classmethod
    def opaque_attention_op(cls) -> bool:
        return True
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@ -384,10 +384,6 @@ class CudaPlatformBase(Platform):
    def supports_fp8(cls) -> bool:
        return cls.has_device_capability(89)

-    @classmethod
-    def supports_v1(cls, model_config: "ModelConfig") -> bool:
-        return True
-
    @classmethod
    def use_custom_allreduce(cls) -> bool:
        return True
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@ -482,20 +482,6 @@ class Platform:
                or parallel_config.distributed_executor_backend
                == "external_launcher")

-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        """Returns whether the current platform can support v1 for the supplied
-        model configuration.
-        """
-        return False
-
-    @classmethod
-    def default_v1(cls, model_config: ModelConfig) -> bool:
-        """
-        Returns whether the current platform supports v1 by default.
-        """
-        return cls.supports_v1(model_config)
-
    @classmethod
    def use_custom_allreduce(cls) -> bool:
        """
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@ -396,11 +396,6 @@ class RocmPlatform(Platform):
        else:
            return torch.float8_e4m3fn

-    @classmethod
-    def supports_v1(cls, model_config: "ModelConfig") -> bool:
-        # V1 support on AMD gpus is experimental
-        return True
-
    @classmethod
    def use_custom_allreduce(cls) -> bool:
        # We only enable custom allreduce for MI300 series
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@ -174,11 +174,6 @@ class TpuPlatform(Platform):
    def use_all_gather(cls) -> bool:
        return True

-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        # V1 support on TPU is experimental
-        return True
-
    @classmethod
    def validate_request(
        cls,
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@ -194,10 +194,6 @@ class XPUPlatform(Platform):
    def get_device_communicator_cls(cls) -> str:
        return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"  # noqa

-    @classmethod
-    def supports_v1(cls, model_config: ModelConfig) -> bool:
-        return True
-
    @classmethod
    def device_count(cls) -> int:
        return torch.xpu.device_count()