mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:45:29 +08:00
[Bugfix] Fix bnb 8bit model weights loading (#19917)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
parent
8ca81bb069
commit
6f170f11dd
@ -577,10 +577,10 @@ def dequantize_dq(quant_states: dict) -> None:
|
|||||||
thereby avoiding this computational overhead during inference. This comes
|
thereby avoiding this computational overhead during inference. This comes
|
||||||
at the cost of increased memory usage.
|
at the cost of increased memory usage.
|
||||||
"""
|
"""
|
||||||
from bitsandbytes.functional import dequantize_blockwise
|
from bitsandbytes.functional import QuantState, dequantize_blockwise
|
||||||
for _, quant_state in quant_states.items():
|
for _, quant_state in quant_states.items():
|
||||||
# Copied from: https://github.com/bitsandbytes-foundation/bitsandbytes/blob/0.45.3/bitsandbytes/functional.py#L1352-#L1356
|
# Copied from: https://github.com/bitsandbytes-foundation/bitsandbytes/blob/0.45.3/bitsandbytes/functional.py#L1352-#L1356
|
||||||
if quant_state.nested:
|
if isinstance(quant_state, QuantState) and quant_state.nested:
|
||||||
absmax = dequantize_blockwise(quant_state.absmax,
|
absmax = dequantize_blockwise(quant_state.absmax,
|
||||||
quant_state.state2)
|
quant_state.state2)
|
||||||
absmax += quant_state.offset
|
absmax += quant_state.offset
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user