mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:05:01 +08:00
[Quantization] Bump to use latest bitsandbytes (#20424)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
7f0367109e
commit
1819fbda63
@ -498,7 +498,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
||||||
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
|
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
|
||||||
else \
|
else \
|
||||||
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
|
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.46.1' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
ENV VLLM_USAGE_SOURCE production-docker-image
|
ENV VLLM_USAGE_SOURCE production-docker-image
|
||||||
|
|||||||
@ -10,7 +10,7 @@ Compared to other quantization methods, BitsAndBytes eliminates the need for cal
|
|||||||
Below are the steps to utilize BitsAndBytes with vLLM.
|
Below are the steps to utilize BitsAndBytes with vLLM.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install bitsandbytes>=0.45.3
|
pip install bitsandbytes>=0.46.1
|
||||||
```
|
```
|
||||||
|
|
||||||
vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint.
|
vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint.
|
||||||
|
|||||||
@ -34,7 +34,7 @@ tokenizers==0.21.1
|
|||||||
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
|
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
|
||||||
schemathesis>=3.39.15 # Required for openai schema test.
|
schemathesis>=3.39.15 # Required for openai schema test.
|
||||||
# quantization
|
# quantization
|
||||||
bitsandbytes>=0.45.3
|
bitsandbytes>=0.46.1
|
||||||
buildkite-test-collector==0.1.9
|
buildkite-test-collector==0.1.9
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -39,7 +39,7 @@ tokenizers==0.21.1
|
|||||||
huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads.
|
huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads.
|
||||||
schemathesis>=3.39.15 # Required for openai schema test.
|
schemathesis>=3.39.15 # Required for openai schema test.
|
||||||
# quantization
|
# quantization
|
||||||
bitsandbytes>=0.45.3
|
bitsandbytes==0.46.1
|
||||||
buildkite-test-collector==0.1.9
|
buildkite-test-collector==0.1.9
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -45,7 +45,7 @@ backoff==2.2.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# schemathesis
|
# schemathesis
|
||||||
bitsandbytes==0.45.3
|
bitsandbytes==0.46.1
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
black==24.10.0
|
black==24.10.0
|
||||||
# via datamodel-code-generator
|
# via datamodel-code-generator
|
||||||
|
|||||||
@ -969,7 +969,7 @@ class ModelConfig:
|
|||||||
|
|
||||||
def _verify_bnb_config(self) -> None:
|
def _verify_bnb_config(self) -> None:
|
||||||
"""
|
"""
|
||||||
The current version of bitsandbytes (0.45.3) with 8-bit models does not
|
The current version of bitsandbytes (0.46.1) with 8-bit models does not
|
||||||
yet support CUDA graph.
|
yet support CUDA graph.
|
||||||
# TODO Remove this when bitsandbytes supports.
|
# TODO Remove this when bitsandbytes supports.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -156,12 +156,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
|
|||||||
def __init__(self, quant_config: BitsAndBytesConfig):
|
def __init__(self, quant_config: BitsAndBytesConfig):
|
||||||
try:
|
try:
|
||||||
import bitsandbytes
|
import bitsandbytes
|
||||||
if bitsandbytes.__version__ < "0.45.3":
|
if bitsandbytes.__version__ < "0.46.1":
|
||||||
raise ImportError("bitsandbytes version is wrong. Please "
|
raise ImportError("bitsandbytes version is wrong. Please "
|
||||||
"install bitsandbytes>=0.45.3.")
|
"install bitsandbytes>=0.46.1.")
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
raise ImportError("Please install bitsandbytes>=0.45.3 via "
|
raise ImportError("Please install bitsandbytes>=0.46.1 via "
|
||||||
"`pip install bitsandbytes>=0.45.3` to use "
|
"`pip install bitsandbytes>=0.46.1` to use "
|
||||||
"bitsandbytes quantizer.") from err
|
"bitsandbytes quantizer.") from err
|
||||||
|
|
||||||
self.quant_config = quant_config
|
self.quant_config = quant_config
|
||||||
|
|||||||
@ -183,12 +183,12 @@ class BitsAndBytesModelLoader(BaseModelLoader):
|
|||||||
try:
|
try:
|
||||||
import bitsandbytes
|
import bitsandbytes
|
||||||
|
|
||||||
if bitsandbytes.__version__ < "0.45.3":
|
if bitsandbytes.__version__ < "0.46.1":
|
||||||
raise ImportError("bitsandbytes version is wrong. Please "
|
raise ImportError("bitsandbytes version is wrong. Please "
|
||||||
"install bitsandbytes>=0.45.3.")
|
"install bitsandbytes>=0.46.1.")
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
raise ImportError("Please install bitsandbytes>=0.45.3 via "
|
raise ImportError("Please install bitsandbytes>=0.46.1 via "
|
||||||
"`pip install bitsandbytes>=0.45.3` to use "
|
"`pip install bitsandbytes>=0.46.1` to use "
|
||||||
"bitsandbytes quantizer.") from err
|
"bitsandbytes quantizer.") from err
|
||||||
|
|
||||||
hf_weights_files, use_safetensors = self._prepare_weights(
|
hf_weights_files, use_safetensors = self._prepare_weights(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user