From 458e63a2c6b18e7febfa30cecb59461f96fbe324 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 13 Jan 2025 20:59:09 +0800 Subject: [PATCH] [platform] add device_control env var (#12009) Signed-off-by: youkaichao --- vllm/platforms/cuda.py | 1 + vllm/platforms/hpu.py | 1 + vllm/platforms/interface.py | 10 ++++++++++ vllm/platforms/neuron.py | 1 + vllm/platforms/rocm.py | 2 ++ vllm/platforms/tpu.py | 1 + vllm/platforms/xpu.py | 1 + 7 files changed, 17 insertions(+) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 3f77ec50ed31f..b6a6c461369f9 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -78,6 +78,7 @@ class CudaPlatformBase(Platform): device_type: str = "cuda" dispatch_key: str = "CUDA" ray_device_key: str = "GPU" + device_control_env_var: str = "CUDA_VISIBLE_DEVICES" @classmethod def get_device_capability(cls, diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index 0acb2804a5f66..b579ebf494bdc 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -20,6 +20,7 @@ class HpuPlatform(Platform): device_type: str = "hpu" dispatch_key: str = "HPU" ray_device_key: str = "HPU" + device_control_env_var: str = "HABANA_VISIBLE_MODULES" @classmethod def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index ec917f75689dd..afa9daa9c98a7 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -78,20 +78,30 @@ class Platform: _enum: PlatformEnum device_name: str device_type: str + # available dispatch keys: # check https://github.com/pytorch/pytorch/blob/313dac6c1ca0fa0cde32477509cce32089f8532a/torchgen/model.py#L134 # noqa # use "CPU" as a fallback for platforms not registered in PyTorch dispatch_key: str = "CPU" + # available ray device keys: # https://github.com/ray-project/ray/blob/10ba5adadcc49c60af2c358a33bb943fb491a171/python/ray/_private/ray_constants.py#L438 # noqa # empty string means the device does not support ray ray_device_key: str = "" + + # platform-agnostic way to specify the device control environment variable, + # .e.g. CUDA_VISIBLE_DEVICES for CUDA. + # hint: search for "get_visible_accelerator_ids_env_var" in + # https://github.com/ray-project/ray/tree/master/python/ray/_private/accelerators # noqa + device_control_env_var: str = "VLLM_DEVICE_CONTROL_ENV_VAR_PLACEHOLDER" + # The torch.compile backend for compiling simple and # standalone functions. The default value is "inductor" to keep # the same behavior as PyTorch. # NOTE: for the forward part of the model, vLLM has another separate # compilation strategy. simple_compile_backend: str = "inductor" + supported_quantization: list[str] = [] def is_cuda(self) -> bool: diff --git a/vllm/platforms/neuron.py b/vllm/platforms/neuron.py index 7f4a867b32ba1..0696f73cc17b4 100644 --- a/vllm/platforms/neuron.py +++ b/vllm/platforms/neuron.py @@ -18,6 +18,7 @@ class NeuronPlatform(Platform): device_type: str = "neuron" ray_device_key: str = "neuron_cores" supported_quantization: list[str] = ["neuron_quant"] + device_control_env_var: str = "NEURON_RT_VISIBLE_CORES" @classmethod def get_device_name(cls, device_id: int = 0) -> str: diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index f12e948113723..7f1e8aef528a6 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -65,6 +65,8 @@ class RocmPlatform(Platform): device_type: str = "cuda" dispatch_key: str = "CUDA" ray_device_key: str = "GPU" + # rocm shares the same device control env var as CUDA + device_control_env_var: str = "CUDA_VISIBLE_DEVICES" supported_quantization: list[str] = [ "awq", "gptq", "fp8", "compressed_tensors", "compressed-tensors", diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 460eb170bba34..ff9487daac7a7 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -20,6 +20,7 @@ class TpuPlatform(Platform): device_type: str = "tpu" dispatch_key: str = "XLA" ray_device_key: str = "TPU" + device_control_env_var: str = "TPU_VISIBLE_CHIPS" supported_quantization: list[str] = [ "tpu_int8", "compressed-tensors", "compressed_tensors" diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index cb74f79b31794..f34376b44e689 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -22,6 +22,7 @@ class XPUPlatform(Platform): # Intel XPU's device key is "GPU" for Ray. # see https://github.com/ray-project/ray/blob/6a5eb5865eeb9ccf058a79b44f107e327e360673/python/ray/_private/accelerators/intel_gpu.py#L20 # noqa: E501 ray_device_key: str = "GPU" + device_control_env_var: str = "ONEAPI_DEVICE_SELECTOR" @classmethod def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int,