From acc1a6e10af7ec16787ffe8dc67ad36a05955024 Mon Sep 17 00:00:00 2001 From: Jun-Howie <62869005+Jun-Howie@users.noreply.github.com> Date: Mon, 1 Sep 2025 13:39:57 +0800 Subject: [PATCH] Fix the bug related to loading GPTP INT3 weights. (#23328) Signed-off-by: JunHowie Co-authored-by: JunHowie Co-authored-by: Isotr0py --- vllm/model_executor/layers/quantization/utils/gptq_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/utils/gptq_utils.py b/vllm/model_executor/layers/quantization/utils/gptq_utils.py index db82b0def1653..4fbd0f5c4efff 100644 --- a/vllm/model_executor/layers/quantization/utils/gptq_utils.py +++ b/vllm/model_executor/layers/quantization/utils/gptq_utils.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from copy import deepcopy +from fractions import Fraction from typing import Optional, Union import regex as re @@ -29,7 +30,7 @@ def override_config(config: QuantizationConfig, prefix: str): if isinstance(desc_act, bool): config.desc_act = desc_act - config.pack_factor = 32 // config.weight_bits # packed into int32 + config.pack_factor = Fraction(32, config.weight_bits) # packed into int32 if config.get_name() == "gptq_marlin": is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym) if isinstance(is_sym, bool):