[V0 deprecation] Remove platform v1 controling interface (#25410)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-12-10 17:25:29 +08:00 · 2025-09-23 09:48:12 +08:00 · 2025-09-23 09:48:12 +08:00 · 6fa78d8f23
commit 6fa78d8f23
parent 9949aa2ef1
8 changed files with 0 additions and 81 deletions
--- a/tests/v1/test_async_llm_dp.py
+++ b/tests/v1/test_async_llm_dp.py
@ -13,7 +13,6 @@ from vllm import SamplingParams
 from vllm.config import VllmConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.inputs import PromptType
 from vllm.platforms import current_platform
 from vllm.sampling_params import RequestOutputKind
 from vllm.v1.engine.async_llm import AsyncLLM
 from vllm.v1.engine.core_client import DPAsyncMPClient
@ -29,10 +28,6 @@ engine_args = AsyncEngineArgs(
    data_parallel_size=DP_SIZE,
 )
 if not current_platform.supports_v1(engine_args.create_model_config()):
    pytest.skip(reason="Requires V1-supporting platform.",
                allow_module_level=True)
 async def generate(
        engine: AsyncLLM,
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -1502,12 +1502,6 @@ class EngineArgs:
            _raise_or_fallback(feature_name=name, recommend_to_remove=True)
            return False
        # Platforms must decide if they can support v1 for this model
        if not current_platform.supports_v1(model_config=model_config):
            _raise_or_fallback(
                feature_name=f"device type={current_platform.device_type}",
                recommend_to_remove=False)
            return False
        #############################################################
        # Experimental Features - allow users to opt in.
@ -1524,12 +1518,6 @@ class EngineArgs:
                                   recommend_to_remove=False)
                return False
        # The platform may be supported on V1, but off by default for now.
        if not current_platform.default_v1(  # noqa: SIM103
                model_config=model_config) and _warn_or_fallback(
                    current_platform.device_name):
            return False
        if (current_platform.is_cpu()
                and model_config.get_sliding_window() is not None):
            _raise_or_fallback(feature_name="sliding window (CPU backend)",
@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
    logger.warning(msg)
 def _warn_or_fallback(feature_name: str) -> bool:
    if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
        logger.warning(
            "Detected VLLM_USE_V1=1 with %s. Usage should "
            "be considered experimental. Please report any "
            "issues on Github.", feature_name)
        should_exit = False
    else:
        logger.info(
            "%s is experimental on VLLM_USE_V1=1. "
            "Falling back to V0 Engine.", feature_name)
        should_exit = True
    return should_exit
 def human_readable_int(value):
    """Parse human-readable integers like '1k', '2M', etc.
    Including decimal values with decimal multipliers.
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@ -328,23 +328,6 @@ class CpuPlatform(Platform):
    def supports_structured_output(cls) -> bool:
        return True
    @classmethod
    def supports_v1(cls, model_config) -> bool:
        """Returns whether the current platform can support v1 for the supplied
        model configuration.
        """
        return True
    @classmethod
    def default_v1(cls, model_config) -> bool:
        """Returns whether the current platform can use v1 by default for the
        supplied model configuration.
        """
        arch = cls.get_cpu_architecture()
        return (cls.supports_v1(model_config)
                and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
                             CpuArchEnum.ARM, CpuArchEnum.S390X))
    @classmethod
    def opaque_attention_op(cls) -> bool:
        return True
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@ -384,10 +384,6 @@ class CudaPlatformBase(Platform):
    def supports_fp8(cls) -> bool:
        return cls.has_device_capability(89)
    @classmethod
    def supports_v1(cls, model_config: "ModelConfig") -> bool:
        return True
    @classmethod
    def use_custom_allreduce(cls) -> bool:
        return True
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@ -482,20 +482,6 @@ class Platform:
                or parallel_config.distributed_executor_backend
                == "external_launcher")
    @classmethod
    def supports_v1(cls, model_config: ModelConfig) -> bool:
        """Returns whether the current platform can support v1 for the supplied
        model configuration.
        """
        return False
    @classmethod
    def default_v1(cls, model_config: ModelConfig) -> bool:
        """
        Returns whether the current platform supports v1 by default.
        """
        return cls.supports_v1(model_config)
    @classmethod
    def use_custom_allreduce(cls) -> bool:
        """
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@ -396,11 +396,6 @@ class RocmPlatform(Platform):
        else:
            return torch.float8_e4m3fn
    @classmethod
    def supports_v1(cls, model_config: "ModelConfig") -> bool:
        # V1 support on AMD gpus is experimental
        return True
    @classmethod
    def use_custom_allreduce(cls) -> bool:
        # We only enable custom allreduce for MI300 series
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@ -174,11 +174,6 @@ class TpuPlatform(Platform):
    def use_all_gather(cls) -> bool:
        return True
    @classmethod
    def supports_v1(cls, model_config: ModelConfig) -> bool:
        # V1 support on TPU is experimental
        return True
    @classmethod
    def validate_request(
        cls,
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@ -194,10 +194,6 @@ class XPUPlatform(Platform):
    def get_device_communicator_cls(cls) -> str:
        return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"  # noqa
    @classmethod
    def supports_v1(cls, model_config: ModelConfig) -> bool:
        return True
    @classmethod
    def device_count(cls) -> int:
        return torch.xpu.device_count()