mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 12:25:41 +08:00
[MISC][V1] Handle exception of current_platform.get_device_name() in arg_utils (#14379)
Signed-off-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
parent
90e88ab756
commit
485afdd3cb
@ -1440,8 +1440,19 @@ class EngineArgs:
|
||||
# When no user override, set the default values based on the usage
|
||||
# context.
|
||||
# Use different default values for different hardware.
|
||||
from vllm.platforms import current_platform
|
||||
device_name = current_platform.get_device_name().lower()
|
||||
|
||||
# Try to query the device name on the current platform. If it fails,
|
||||
# it may be because the platform that imports vLLM is not the same
|
||||
# as the platform that vLLM is running on (e.g. the case of scaling
|
||||
# vLLM with Ray) and has no GPUs. In this case we use the default
|
||||
# values for non-H100/H200 GPUs.
|
||||
try:
|
||||
from vllm.platforms import current_platform
|
||||
device_name = current_platform.get_device_name().lower()
|
||||
except Exception:
|
||||
# This is only used to set default_max_num_batched_tokens
|
||||
device_name = "no-device"
|
||||
|
||||
if "h100" in device_name or "h200" in device_name:
|
||||
# For H100 and H200, we use larger default values.
|
||||
default_max_num_batched_tokens = {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user