mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 02:25:01 +08:00
[V0 deprecation] Remove platform v1 controling interface (#25410)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
9949aa2ef1
commit
6fa78d8f23
@ -13,7 +13,6 @@ from vllm import SamplingParams
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.inputs import PromptType
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sampling_params import RequestOutputKind
|
||||
from vllm.v1.engine.async_llm import AsyncLLM
|
||||
from vllm.v1.engine.core_client import DPAsyncMPClient
|
||||
@ -29,10 +28,6 @@ engine_args = AsyncEngineArgs(
|
||||
data_parallel_size=DP_SIZE,
|
||||
)
|
||||
|
||||
if not current_platform.supports_v1(engine_args.create_model_config()):
|
||||
pytest.skip(reason="Requires V1-supporting platform.",
|
||||
allow_module_level=True)
|
||||
|
||||
|
||||
async def generate(
|
||||
engine: AsyncLLM,
|
||||
|
||||
@ -1502,12 +1502,6 @@ class EngineArgs:
|
||||
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
|
||||
return False
|
||||
|
||||
# Platforms must decide if they can support v1 for this model
|
||||
if not current_platform.supports_v1(model_config=model_config):
|
||||
_raise_or_fallback(
|
||||
feature_name=f"device type={current_platform.device_type}",
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
#############################################################
|
||||
# Experimental Features - allow users to opt in.
|
||||
|
||||
@ -1524,12 +1518,6 @@ class EngineArgs:
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# The platform may be supported on V1, but off by default for now.
|
||||
if not current_platform.default_v1( # noqa: SIM103
|
||||
model_config=model_config) and _warn_or_fallback(
|
||||
current_platform.device_name):
|
||||
return False
|
||||
|
||||
if (current_platform.is_cpu()
|
||||
and model_config.get_sliding_window() is not None):
|
||||
_raise_or_fallback(feature_name="sliding window (CPU backend)",
|
||||
@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
|
||||
logger.warning(msg)
|
||||
|
||||
|
||||
def _warn_or_fallback(feature_name: str) -> bool:
|
||||
if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
|
||||
logger.warning(
|
||||
"Detected VLLM_USE_V1=1 with %s. Usage should "
|
||||
"be considered experimental. Please report any "
|
||||
"issues on Github.", feature_name)
|
||||
should_exit = False
|
||||
else:
|
||||
logger.info(
|
||||
"%s is experimental on VLLM_USE_V1=1. "
|
||||
"Falling back to V0 Engine.", feature_name)
|
||||
should_exit = True
|
||||
return should_exit
|
||||
|
||||
|
||||
def human_readable_int(value):
|
||||
"""Parse human-readable integers like '1k', '2M', etc.
|
||||
Including decimal values with decimal multipliers.
|
||||
|
||||
@ -328,23 +328,6 @@ class CpuPlatform(Platform):
|
||||
def supports_structured_output(cls) -> bool:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def supports_v1(cls, model_config) -> bool:
|
||||
"""Returns whether the current platform can support v1 for the supplied
|
||||
model configuration.
|
||||
"""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def default_v1(cls, model_config) -> bool:
|
||||
"""Returns whether the current platform can use v1 by default for the
|
||||
supplied model configuration.
|
||||
"""
|
||||
arch = cls.get_cpu_architecture()
|
||||
return (cls.supports_v1(model_config)
|
||||
and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
|
||||
CpuArchEnum.ARM, CpuArchEnum.S390X))
|
||||
|
||||
@classmethod
|
||||
def opaque_attention_op(cls) -> bool:
|
||||
return True
|
||||
|
||||
@ -384,10 +384,6 @@ class CudaPlatformBase(Platform):
|
||||
def supports_fp8(cls) -> bool:
|
||||
return cls.has_device_capability(89)
|
||||
|
||||
@classmethod
|
||||
def supports_v1(cls, model_config: "ModelConfig") -> bool:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def use_custom_allreduce(cls) -> bool:
|
||||
return True
|
||||
|
||||
@ -482,20 +482,6 @@ class Platform:
|
||||
or parallel_config.distributed_executor_backend
|
||||
== "external_launcher")
|
||||
|
||||
@classmethod
|
||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||
"""Returns whether the current platform can support v1 for the supplied
|
||||
model configuration.
|
||||
"""
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def default_v1(cls, model_config: ModelConfig) -> bool:
|
||||
"""
|
||||
Returns whether the current platform supports v1 by default.
|
||||
"""
|
||||
return cls.supports_v1(model_config)
|
||||
|
||||
@classmethod
|
||||
def use_custom_allreduce(cls) -> bool:
|
||||
"""
|
||||
|
||||
@ -396,11 +396,6 @@ class RocmPlatform(Platform):
|
||||
else:
|
||||
return torch.float8_e4m3fn
|
||||
|
||||
@classmethod
|
||||
def supports_v1(cls, model_config: "ModelConfig") -> bool:
|
||||
# V1 support on AMD gpus is experimental
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def use_custom_allreduce(cls) -> bool:
|
||||
# We only enable custom allreduce for MI300 series
|
||||
|
||||
@ -174,11 +174,6 @@ class TpuPlatform(Platform):
|
||||
def use_all_gather(cls) -> bool:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||
# V1 support on TPU is experimental
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def validate_request(
|
||||
cls,
|
||||
|
||||
@ -194,10 +194,6 @@ class XPUPlatform(Platform):
|
||||
def get_device_communicator_cls(cls) -> str:
|
||||
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa
|
||||
|
||||
@classmethod
|
||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def device_count(cls) -> int:
|
||||
return torch.xpu.device_count()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user