mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:45:36 +08:00
Build CUDA11.8 wheels for release (#1596)
This commit is contained in:
parent
8efe23f150
commit
fd58b73a40
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@ -50,7 +50,7 @@ jobs:
|
|||||||
os: ['ubuntu-20.04']
|
os: ['ubuntu-20.04']
|
||||||
python-version: ['3.8', '3.9', '3.10', '3.11']
|
python-version: ['3.8', '3.9', '3.10', '3.11']
|
||||||
pytorch-version: ['2.1.0']
|
pytorch-version: ['2.1.0']
|
||||||
cuda-version: ['12.1']
|
cuda-version: ['11.8', '12.1']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
|
|||||||
15
setup.py
15
setup.py
@ -12,6 +12,8 @@ from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
|
|||||||
|
|
||||||
ROOT_DIR = os.path.dirname(__file__)
|
ROOT_DIR = os.path.dirname(__file__)
|
||||||
|
|
||||||
|
MAIN_CUDA_VERSION = "12.1"
|
||||||
|
|
||||||
# Supported NVIDIA GPU architectures.
|
# Supported NVIDIA GPU architectures.
|
||||||
SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
|
SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
|
||||||
|
|
||||||
@ -225,7 +227,7 @@ def get_path(*filepath) -> str:
|
|||||||
return os.path.join(ROOT_DIR, *filepath)
|
return os.path.join(ROOT_DIR, *filepath)
|
||||||
|
|
||||||
|
|
||||||
def find_version(filepath: str):
|
def find_version(filepath: str) -> str:
|
||||||
"""Extract version information from the given filepath.
|
"""Extract version information from the given filepath.
|
||||||
|
|
||||||
Adapted from https://github.com/ray-project/ray/blob/0b190ee1160eeca9796bc091e07eaebf4c85b511/python/setup.py
|
Adapted from https://github.com/ray-project/ray/blob/0b190ee1160eeca9796bc091e07eaebf4c85b511/python/setup.py
|
||||||
@ -238,6 +240,15 @@ def find_version(filepath: str):
|
|||||||
raise RuntimeError("Unable to find version string.")
|
raise RuntimeError("Unable to find version string.")
|
||||||
|
|
||||||
|
|
||||||
|
def get_vllm_version() -> str:
|
||||||
|
version = find_version(get_path("vllm", "__init__.py"))
|
||||||
|
cuda_version = str(nvcc_cuda_version)
|
||||||
|
if cuda_version != MAIN_CUDA_VERSION:
|
||||||
|
cuda_version_str = cuda_version.replace(".", "")[:3]
|
||||||
|
version += f"+cu{cuda_version_str}"
|
||||||
|
return version
|
||||||
|
|
||||||
|
|
||||||
def read_readme() -> str:
|
def read_readme() -> str:
|
||||||
"""Read the README file if present."""
|
"""Read the README file if present."""
|
||||||
p = get_path("README.md")
|
p = get_path("README.md")
|
||||||
@ -256,7 +267,7 @@ def get_requirements() -> List[str]:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="vllm",
|
name="vllm",
|
||||||
version=find_version(get_path("vllm", "__init__.py")),
|
version=get_vllm_version(),
|
||||||
author="vLLM Team",
|
author="vLLM Team",
|
||||||
license="Apache 2.0",
|
license="Apache 2.0",
|
||||||
description=("A high-throughput and memory-efficient inference and "
|
description=("A high-throughput and memory-efficient inference and "
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user