mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 17:25:29 +08:00
[V0 deprecation] Remove platform v1 controling interface (#25410)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
9949aa2ef1
commit
6fa78d8f23
@ -13,7 +13,6 @@ from vllm import SamplingParams
|
|||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
from vllm.inputs import PromptType
|
from vllm.inputs import PromptType
|
||||||
from vllm.platforms import current_platform
|
|
||||||
from vllm.sampling_params import RequestOutputKind
|
from vllm.sampling_params import RequestOutputKind
|
||||||
from vllm.v1.engine.async_llm import AsyncLLM
|
from vllm.v1.engine.async_llm import AsyncLLM
|
||||||
from vllm.v1.engine.core_client import DPAsyncMPClient
|
from vllm.v1.engine.core_client import DPAsyncMPClient
|
||||||
@ -29,10 +28,6 @@ engine_args = AsyncEngineArgs(
|
|||||||
data_parallel_size=DP_SIZE,
|
data_parallel_size=DP_SIZE,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not current_platform.supports_v1(engine_args.create_model_config()):
|
|
||||||
pytest.skip(reason="Requires V1-supporting platform.",
|
|
||||||
allow_module_level=True)
|
|
||||||
|
|
||||||
|
|
||||||
async def generate(
|
async def generate(
|
||||||
engine: AsyncLLM,
|
engine: AsyncLLM,
|
||||||
|
|||||||
@ -1502,12 +1502,6 @@ class EngineArgs:
|
|||||||
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
|
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Platforms must decide if they can support v1 for this model
|
|
||||||
if not current_platform.supports_v1(model_config=model_config):
|
|
||||||
_raise_or_fallback(
|
|
||||||
feature_name=f"device type={current_platform.device_type}",
|
|
||||||
recommend_to_remove=False)
|
|
||||||
return False
|
|
||||||
#############################################################
|
#############################################################
|
||||||
# Experimental Features - allow users to opt in.
|
# Experimental Features - allow users to opt in.
|
||||||
|
|
||||||
@ -1524,12 +1518,6 @@ class EngineArgs:
|
|||||||
recommend_to_remove=False)
|
recommend_to_remove=False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# The platform may be supported on V1, but off by default for now.
|
|
||||||
if not current_platform.default_v1( # noqa: SIM103
|
|
||||||
model_config=model_config) and _warn_or_fallback(
|
|
||||||
current_platform.device_name):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if (current_platform.is_cpu()
|
if (current_platform.is_cpu()
|
||||||
and model_config.get_sliding_window() is not None):
|
and model_config.get_sliding_window() is not None):
|
||||||
_raise_or_fallback(feature_name="sliding window (CPU backend)",
|
_raise_or_fallback(feature_name="sliding window (CPU backend)",
|
||||||
@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool):
|
|||||||
logger.warning(msg)
|
logger.warning(msg)
|
||||||
|
|
||||||
|
|
||||||
def _warn_or_fallback(feature_name: str) -> bool:
|
|
||||||
if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
|
|
||||||
logger.warning(
|
|
||||||
"Detected VLLM_USE_V1=1 with %s. Usage should "
|
|
||||||
"be considered experimental. Please report any "
|
|
||||||
"issues on Github.", feature_name)
|
|
||||||
should_exit = False
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
"%s is experimental on VLLM_USE_V1=1. "
|
|
||||||
"Falling back to V0 Engine.", feature_name)
|
|
||||||
should_exit = True
|
|
||||||
return should_exit
|
|
||||||
|
|
||||||
|
|
||||||
def human_readable_int(value):
|
def human_readable_int(value):
|
||||||
"""Parse human-readable integers like '1k', '2M', etc.
|
"""Parse human-readable integers like '1k', '2M', etc.
|
||||||
Including decimal values with decimal multipliers.
|
Including decimal values with decimal multipliers.
|
||||||
|
|||||||
@ -328,23 +328,6 @@ class CpuPlatform(Platform):
|
|||||||
def supports_structured_output(cls) -> bool:
|
def supports_structured_output(cls) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def supports_v1(cls, model_config) -> bool:
|
|
||||||
"""Returns whether the current platform can support v1 for the supplied
|
|
||||||
model configuration.
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def default_v1(cls, model_config) -> bool:
|
|
||||||
"""Returns whether the current platform can use v1 by default for the
|
|
||||||
supplied model configuration.
|
|
||||||
"""
|
|
||||||
arch = cls.get_cpu_architecture()
|
|
||||||
return (cls.supports_v1(model_config)
|
|
||||||
and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
|
|
||||||
CpuArchEnum.ARM, CpuArchEnum.S390X))
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def opaque_attention_op(cls) -> bool:
|
def opaque_attention_op(cls) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -384,10 +384,6 @@ class CudaPlatformBase(Platform):
|
|||||||
def supports_fp8(cls) -> bool:
|
def supports_fp8(cls) -> bool:
|
||||||
return cls.has_device_capability(89)
|
return cls.has_device_capability(89)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def supports_v1(cls, model_config: "ModelConfig") -> bool:
|
|
||||||
return True
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def use_custom_allreduce(cls) -> bool:
|
def use_custom_allreduce(cls) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -482,20 +482,6 @@ class Platform:
|
|||||||
or parallel_config.distributed_executor_backend
|
or parallel_config.distributed_executor_backend
|
||||||
== "external_launcher")
|
== "external_launcher")
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
|
||||||
"""Returns whether the current platform can support v1 for the supplied
|
|
||||||
model configuration.
|
|
||||||
"""
|
|
||||||
return False
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def default_v1(cls, model_config: ModelConfig) -> bool:
|
|
||||||
"""
|
|
||||||
Returns whether the current platform supports v1 by default.
|
|
||||||
"""
|
|
||||||
return cls.supports_v1(model_config)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def use_custom_allreduce(cls) -> bool:
|
def use_custom_allreduce(cls) -> bool:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -396,11 +396,6 @@ class RocmPlatform(Platform):
|
|||||||
else:
|
else:
|
||||||
return torch.float8_e4m3fn
|
return torch.float8_e4m3fn
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def supports_v1(cls, model_config: "ModelConfig") -> bool:
|
|
||||||
# V1 support on AMD gpus is experimental
|
|
||||||
return True
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def use_custom_allreduce(cls) -> bool:
|
def use_custom_allreduce(cls) -> bool:
|
||||||
# We only enable custom allreduce for MI300 series
|
# We only enable custom allreduce for MI300 series
|
||||||
|
|||||||
@ -174,11 +174,6 @@ class TpuPlatform(Platform):
|
|||||||
def use_all_gather(cls) -> bool:
|
def use_all_gather(cls) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
|
||||||
# V1 support on TPU is experimental
|
|
||||||
return True
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_request(
|
def validate_request(
|
||||||
cls,
|
cls,
|
||||||
|
|||||||
@ -194,10 +194,6 @@ class XPUPlatform(Platform):
|
|||||||
def get_device_communicator_cls(cls) -> str:
|
def get_device_communicator_cls(cls) -> str:
|
||||||
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa
|
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
|
||||||
return True
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def device_count(cls) -> int:
|
def device_count(cls) -> int:
|
||||||
return torch.xpu.device_count()
|
return torch.xpu.device_count()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user