Introduce VLLM_CUDART_SO_PATH to allow users specify the .so path (#12998)

Signed-off-by: Lu Fang <lufang@fb.com>
This commit is contained in:
Lu Fang 2025-02-12 09:06:13 -08:00 committed by GitHub
parent 82cabf53a3
commit 042c3419fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 1 deletions

View File

@ -5,12 +5,14 @@ convenient for use when we just need to call a few functions.
""" """
import ctypes import ctypes
import glob
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
# this line makes it possible to directly load `libcudart.so` using `ctypes` # this line makes it possible to directly load `libcudart.so` using `ctypes`
import torch # noqa import torch # noqa
import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)
@ -60,6 +62,29 @@ def find_loaded_library(lib_name) -> Optional[str]:
return path return path
def get_cudart_lib_path_from_env() -> Optional[str]:
"""
In some system, find_loaded_library() may not work. So we allow users to
specify the path through environment variable VLLM_CUDART_SO_PATH.
"""
cudart_so_env = envs.VLLM_CUDART_SO_PATH
if cudart_so_env is not None:
cudart_paths = [
cudart_so_env,
]
for path in cudart_paths:
file_paths = glob.glob(path)
if len(file_paths) > 0:
logger.info(
"Found cudart library at %s through env var"
"VLLM_CUDART_SO_PATH=%s",
file_paths[0],
cudart_so_env,
)
return file_paths[0]
return None
class CudaRTLibrary: class CudaRTLibrary:
exported_functions = [ exported_functions = [
# cudaError_t cudaSetDevice ( int device ) # cudaError_t cudaSetDevice ( int device )
@ -105,8 +130,13 @@ class CudaRTLibrary:
def __init__(self, so_file: Optional[str] = None): def __init__(self, so_file: Optional[str] = None):
if so_file is None: if so_file is None:
so_file = find_loaded_library("libcudart") so_file = find_loaded_library("libcudart")
if so_file is None:
so_file = get_cudart_lib_path_from_env()
assert so_file is not None, \ assert so_file is not None, \
"libcudart is not loaded in the current process" (
"libcudart is not loaded in the current process, "
"try setting VLLM_CUDART_SO_PATH"
)
if so_file not in CudaRTLibrary.path_to_library_cache: if so_file not in CudaRTLibrary.path_to_library_cache:
lib = ctypes.CDLL(so_file) lib = ctypes.CDLL(so_file)
CudaRTLibrary.path_to_library_cache[so_file] = lib CudaRTLibrary.path_to_library_cache[so_file] = lib

View File

@ -87,6 +87,7 @@ if TYPE_CHECKING:
VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON: bool = False VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON: bool = False
VLLM_RAY_PER_WORKER_GPUS: float = 1.0 VLLM_RAY_PER_WORKER_GPUS: float = 1.0
VLLM_RAY_BUNDLE_INDICES: str = "" VLLM_RAY_BUNDLE_INDICES: str = ""
VLLM_CUDART_SO_PATH: Optional[str] = None
def get_default_cache_root(): def get_default_cache_root():
@ -572,6 +573,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# models the alignment is already naturally aligned to 256 bytes. # models the alignment is already naturally aligned to 256 bytes.
"VLLM_CUDA_MEM_ALIGN_KV_CACHE": "VLLM_CUDA_MEM_ALIGN_KV_CACHE":
lambda: bool(int(os.getenv("VLLM_CUDA_MEM_ALIGN_KV_CACHE", "1"))), lambda: bool(int(os.getenv("VLLM_CUDA_MEM_ALIGN_KV_CACHE", "1"))),
# In some system, find_loaded_library() may not work. So we allow users to
# specify the path through environment variable VLLM_CUDART_SO_PATH.
"VLLM_CUDART_SO_PATH":
lambda: os.getenv("VLLM_CUDART_SO_PATH", None),
} }
# end-env-vars-definition # end-env-vars-definition