mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 05:04:58 +08:00
[bugfix] add supports_v1 platform interface (#15417)
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
parent
5d8e1c9279
commit
5f063a80bd
@ -1666,9 +1666,8 @@ class EngineArgs:
|
|||||||
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
|
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# No support for device type other than CUDA, AMD (experiemntal) or
|
# Platforms must decide if they can support v1 for this model
|
||||||
# TPU (experimental) so far.
|
if not current_platform.supports_v1(model_config=model_config):
|
||||||
if not (current_platform.is_cuda_alike() or current_platform.is_tpu()):
|
|
||||||
_raise_or_fallback(
|
_raise_or_fallback(
|
||||||
feature_name=f"device type={current_platform.device_type}",
|
feature_name=f"device type={current_platform.device_type}",
|
||||||
recommend_to_remove=False)
|
recommend_to_remove=False)
|
||||||
|
|||||||
@ -20,8 +20,9 @@ from vllm.utils import import_pynvml
|
|||||||
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
|
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import ModelConfig, VllmConfig
|
||||||
else:
|
else:
|
||||||
|
ModelConfig = None
|
||||||
VllmConfig = None
|
VllmConfig = None
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -303,6 +304,10 @@ class CudaPlatformBase(Platform):
|
|||||||
def supports_fp8(cls) -> bool:
|
def supports_fp8(cls) -> bool:
|
||||||
return cls.has_device_capability(89)
|
return cls.has_device_capability(89)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
# NVML utils
|
# NVML utils
|
||||||
# Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
|
# Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
|
||||||
|
|||||||
@ -12,9 +12,10 @@ import torch
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import ModelConfig, VllmConfig
|
||||||
from vllm.utils import FlexibleArgumentParser
|
from vllm.utils import FlexibleArgumentParser
|
||||||
else:
|
else:
|
||||||
|
ModelConfig = None
|
||||||
VllmConfig = None
|
VllmConfig = None
|
||||||
FlexibleArgumentParser = None
|
FlexibleArgumentParser = None
|
||||||
|
|
||||||
@ -371,6 +372,13 @@ class Platform:
|
|||||||
or parallel_config.distributed_executor_backend
|
or parallel_config.distributed_executor_backend
|
||||||
== "external_launcher")
|
== "external_launcher")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||||
|
"""Returns whether the current platform can support v1 for the supplied
|
||||||
|
model configuration.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class UnspecifiedPlatform(Platform):
|
class UnspecifiedPlatform(Platform):
|
||||||
_enum = PlatformEnum.UNSPECIFIED
|
_enum = PlatformEnum.UNSPECIFIED
|
||||||
|
|||||||
@ -12,8 +12,9 @@ from vllm.logger import init_logger
|
|||||||
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
|
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import ModelConfig, VllmConfig
|
||||||
else:
|
else:
|
||||||
|
ModelConfig = None
|
||||||
VllmConfig = None
|
VllmConfig = None
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -249,3 +250,8 @@ class RocmPlatform(Platform):
|
|||||||
return torch.float8_e4m3fnuz
|
return torch.float8_e4m3fnuz
|
||||||
else:
|
else:
|
||||||
return torch.float8_e4m3fn
|
return torch.float8_e4m3fn
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||||
|
# V1 support on AMD gpus is experimental
|
||||||
|
return True
|
||||||
|
|||||||
@ -10,8 +10,9 @@ from vllm.logger import init_logger
|
|||||||
from .interface import Platform, PlatformEnum, _Backend
|
from .interface import Platform, PlatformEnum, _Backend
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import ModelConfig, VllmConfig
|
||||||
else:
|
else:
|
||||||
|
ModelConfig = None
|
||||||
VllmConfig = None
|
VllmConfig = None
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -127,3 +128,8 @@ class TpuPlatform(Platform):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def use_all_gather(cls) -> bool:
|
def use_all_gather(cls) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports_v1(cls, model_config: ModelConfig) -> bool:
|
||||||
|
# V1 support on TPU is experimental
|
||||||
|
return True
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user