diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index aeba1ee553a5..58f9944b08aa 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -50,7 +50,7 @@ jobs:
           os: ['ubuntu-20.04']
           python-version: ['3.8', '3.9', '3.10', '3.11']
           pytorch-version: ['2.1.0']
-          cuda-version: ['12.1']
+          cuda-version: ['11.8', '12.1']
 
     steps:
       - name: Checkout
diff --git a/setup.py b/setup.py
index 660b5196cfd9..806a7192ac9c 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,8 @@ from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
 
 ROOT_DIR = os.path.dirname(__file__)
 
+MAIN_CUDA_VERSION = "12.1"
+
 # Supported NVIDIA GPU architectures.
 SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
 
@@ -225,7 +227,7 @@ def get_path(*filepath) -> str:
     return os.path.join(ROOT_DIR, *filepath)
 
 
-def find_version(filepath: str):
+def find_version(filepath: str) -> str:
     """Extract version information from the given filepath.
 
     Adapted from https://github.com/ray-project/ray/blob/0b190ee1160eeca9796bc091e07eaebf4c85b511/python/setup.py
@@ -238,6 +240,15 @@ def find_version(filepath: str):
         raise RuntimeError("Unable to find version string.")
 
 
+def get_vllm_version() -> str:
+    version = find_version(get_path("vllm", "__init__.py"))
+    cuda_version = str(nvcc_cuda_version)
+    if cuda_version != MAIN_CUDA_VERSION:
+        cuda_version_str = cuda_version.replace(".", "")[:3]
+        version += f"+cu{cuda_version_str}"
+    return version
+
+
 def read_readme() -> str:
     """Read the README file if present."""
     p = get_path("README.md")
@@ -256,7 +267,7 @@ def get_requirements() -> List[str]:
 
 setuptools.setup(
     name="vllm",
-    version=find_version(get_path("vllm", "__init__.py")),
+    version=get_vllm_version(),
     author="vLLM Team",
     license="Apache 2.0",
     description=("A high-throughput and memory-efficient inference and "