From 1819fbda638d4e10512570ebad0e6b16661238d8 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Thu, 3 Jul 2025 21:58:46 +0800
Subject: [PATCH] [Quantization] Bump to use latest bitsandbytes (#20424)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 docker/Dockerfile                                       | 2 +-
 docs/features/quantization/bnb.md                       | 2 +-
 requirements/nightly_torch_test.txt                     | 2 +-
 requirements/test.in                                    | 2 +-
 requirements/test.txt                                   | 2 +-
 vllm/config.py                                          | 2 +-
 vllm/model_executor/layers/quantization/bitsandbytes.py | 8 ++++----
 vllm/model_executor/model_loader/bitsandbytes_loader.py | 8 ++++----
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index d1009fb4fb18..ec18c45a096a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -498,7 +498,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
         uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
     else \
-        uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
+        uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.46.1' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
     fi
 
 ENV VLLM_USAGE_SOURCE production-docker-image
diff --git a/docs/features/quantization/bnb.md b/docs/features/quantization/bnb.md
index 5756fdb28837..ca13ee107ef4 100644
--- a/docs/features/quantization/bnb.md
+++ b/docs/features/quantization/bnb.md
@@ -10,7 +10,7 @@ Compared to other quantization methods, BitsAndBytes eliminates the need for cal
 Below are the steps to utilize BitsAndBytes with vLLM.
 
 ```bash
-pip install bitsandbytes>=0.45.3
+pip install bitsandbytes>=0.46.1
 ```
 
 vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint.
diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt
index fd0b0fac12a9..0bade084fdf6 100644
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@@ -34,7 +34,7 @@ tokenizers==0.21.1
 huggingface-hub[hf_xet]>=0.30.0  # Required for Xet downloads.
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
-bitsandbytes>=0.45.3
+bitsandbytes>=0.46.1
 buildkite-test-collector==0.1.9
 
 
diff --git a/requirements/test.in b/requirements/test.in
index 85c96df8e8f4..5f8b97a0e341 100644
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -39,7 +39,7 @@ tokenizers==0.21.1
 huggingface-hub[hf_xet]>=0.33.0  # Required for Xet downloads.
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
-bitsandbytes>=0.45.3
+bitsandbytes==0.46.1
 buildkite-test-collector==0.1.9
 
 
diff --git a/requirements/test.txt b/requirements/test.txt
index 16d8ee54adcf..f6f599df758f 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -45,7 +45,7 @@ backoff==2.2.1
     # via
     #   -r requirements/test.in
     #   schemathesis
-bitsandbytes==0.45.3
+bitsandbytes==0.46.1
     # via -r requirements/test.in
 black==24.10.0
     # via datamodel-code-generator
diff --git a/vllm/config.py b/vllm/config.py
index 5c19061a0d51..8a0080f79702 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -969,7 +969,7 @@ class ModelConfig:
 
     def _verify_bnb_config(self) -> None:
         """
-        The current version of bitsandbytes (0.45.3) with 8-bit models does not
+        The current version of bitsandbytes (0.46.1) with 8-bit models does not
         yet support CUDA graph.
         # TODO Remove this when bitsandbytes supports.
         """
diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py
index 53ba84ea8e75..1ed3ef8d2173 100644
--- a/vllm/model_executor/layers/quantization/bitsandbytes.py
+++ b/vllm/model_executor/layers/quantization/bitsandbytes.py
@@ -156,12 +156,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
     def __init__(self, quant_config: BitsAndBytesConfig):
         try:
             import bitsandbytes
-            if bitsandbytes.__version__ < "0.45.3":
+            if bitsandbytes.__version__ < "0.46.1":
                 raise ImportError("bitsandbytes version is wrong. Please "
-                                  "install bitsandbytes>=0.45.3.")
+                                  "install bitsandbytes>=0.46.1.")
         except ImportError as err:
-            raise ImportError("Please install bitsandbytes>=0.45.3 via "
-                              "`pip install bitsandbytes>=0.45.3` to use "
+            raise ImportError("Please install bitsandbytes>=0.46.1 via "
+                              "`pip install bitsandbytes>=0.46.1` to use "
                               "bitsandbytes quantizer.") from err
 
         self.quant_config = quant_config
diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py
index 0c46d170e88d..8e330f7eeaf4 100644
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -183,12 +183,12 @@ class BitsAndBytesModelLoader(BaseModelLoader):
         try:
             import bitsandbytes
 
-            if bitsandbytes.__version__ < "0.45.3":
+            if bitsandbytes.__version__ < "0.46.1":
                 raise ImportError("bitsandbytes version is wrong. Please "
-                                  "install bitsandbytes>=0.45.3.")
+                                  "install bitsandbytes>=0.46.1.")
         except ImportError as err:
-            raise ImportError("Please install bitsandbytes>=0.45.3 via "
-                              "`pip install bitsandbytes>=0.45.3` to use "
+            raise ImportError("Please install bitsandbytes>=0.46.1 via "
+                              "`pip install bitsandbytes>=0.46.1` to use "
                               "bitsandbytes quantizer.") from err
 
         hf_weights_files, use_safetensors = self._prepare_weights(