From 6f170f11dddcfafa061785d4fb4993f7bcb16107 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sat, 21 Jun 2025 11:29:09 +0800 Subject: [PATCH] [Bugfix] Fix bnb 8bit model weights loading (#19917) Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/model_executor/model_loader/bitsandbytes_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py index a0a5372600f3..09857ef297f0 100644 --- a/vllm/model_executor/model_loader/bitsandbytes_loader.py +++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py @@ -577,10 +577,10 @@ def dequantize_dq(quant_states: dict) -> None: thereby avoiding this computational overhead during inference. This comes at the cost of increased memory usage. """ - from bitsandbytes.functional import dequantize_blockwise + from bitsandbytes.functional import QuantState, dequantize_blockwise for _, quant_state in quant_states.items(): # Copied from: https://github.com/bitsandbytes-foundation/bitsandbytes/blob/0.45.3/bitsandbytes/functional.py#L1352-#L1356 - if quant_state.nested: + if isinstance(quant_state, QuantState) and quant_state.nested: absmax = dequantize_blockwise(quant_state.absmax, quant_state.state2) absmax += quant_state.offset