diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index aeba1ee553a5..58f9944b08aa 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -50,7 +50,7 @@ jobs: os: ['ubuntu-20.04'] python-version: ['3.8', '3.9', '3.10', '3.11'] pytorch-version: ['2.1.0'] - cuda-version: ['12.1'] + cuda-version: ['11.8', '12.1'] steps: - name: Checkout diff --git a/setup.py b/setup.py index 660b5196cfd9..806a7192ac9c 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,8 @@ from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME ROOT_DIR = os.path.dirname(__file__) +MAIN_CUDA_VERSION = "12.1" + # Supported NVIDIA GPU architectures. SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"} @@ -225,7 +227,7 @@ def get_path(*filepath) -> str: return os.path.join(ROOT_DIR, *filepath) -def find_version(filepath: str): +def find_version(filepath: str) -> str: """Extract version information from the given filepath. Adapted from https://github.com/ray-project/ray/blob/0b190ee1160eeca9796bc091e07eaebf4c85b511/python/setup.py @@ -238,6 +240,15 @@ def find_version(filepath: str): raise RuntimeError("Unable to find version string.") +def get_vllm_version() -> str: + version = find_version(get_path("vllm", "__init__.py")) + cuda_version = str(nvcc_cuda_version) + if cuda_version != MAIN_CUDA_VERSION: + cuda_version_str = cuda_version.replace(".", "")[:3] + version += f"+cu{cuda_version_str}" + return version + + def read_readme() -> str: """Read the README file if present.""" p = get_path("README.md") @@ -256,7 +267,7 @@ def get_requirements() -> List[str]: setuptools.setup( name="vllm", - version=find_version(get_path("vllm", "__init__.py")), + version=get_vllm_version(), author="vLLM Team", license="Apache 2.0", description=("A high-throughput and memory-efficient inference and "