diff --git a/Dockerfile b/Dockerfile index df79412bbeceb..21945cb0bd09c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -286,7 +286,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ uv pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ else \ - uv pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ + uv pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ fi ENV VLLM_USAGE_SOURCE production-docker-image diff --git a/docs/source/features/quantization/bnb.md b/docs/source/features/quantization/bnb.md index b81d89c457513..fc499e7692d98 100644 --- a/docs/source/features/quantization/bnb.md +++ b/docs/source/features/quantization/bnb.md @@ -9,7 +9,7 @@ Compared to other quantization methods, BitsAndBytes eliminates the need for cal Below are the steps to utilize BitsAndBytes with vLLM. ```console -pip install bitsandbytes>=0.45.0 +pip install bitsandbytes>=0.45.3 ``` vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint. diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index 33c2ca93ffa17..1e8e7aa1b8c12 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -155,12 +155,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase): def __init__(self, quant_config: BitsAndBytesConfig): try: import bitsandbytes - if bitsandbytes.__version__ < "0.45.0": + if bitsandbytes.__version__ < "0.45.3": raise ImportError("bitsandbytes version is wrong. Please " - "install bitsandbytes>=0.45.0.") + "install bitsandbytes>=0.45.3.") except ImportError as err: - raise ImportError("Please install bitsandbytes>=0.45.0 via " - "`pip install bitsandbytes>=0.45.0` to use " + raise ImportError("Please install bitsandbytes>=0.45.3 via " + "`pip install bitsandbytes>=0.45.3` to use " "bitsandbytes quantizer.") from err self.quant_config = quant_config diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py index b2ffca2a4b4dc..d3f7a26e7f9e0 100644 --- a/vllm/model_executor/model_loader/loader.py +++ b/vllm/model_executor/model_loader/loader.py @@ -862,12 +862,12 @@ class BitsAndBytesModelLoader(BaseModelLoader): try: import bitsandbytes - if bitsandbytes.__version__ < "0.45.0": + if bitsandbytes.__version__ < "0.45.3": raise ImportError("bitsandbytes version is wrong. Please " - "install bitsandbytes>=0.45.0.") + "install bitsandbytes>=0.45.3.") except ImportError as err: - raise ImportError("Please install bitsandbytes>=0.45.0 via " - "`pip install bitsandbytes>=0.45.0` to use " + raise ImportError("Please install bitsandbytes>=0.45.3 via " + "`pip install bitsandbytes>=0.45.3` to use " "bitsandbytes quantizer.") from err hf_weights_files, use_safetensors = self._prepare_weights(