[bugfix] fix the default value of llm_int8_threshold in BitsAndBytesConfig (#10657)

2026-01-05 06:57:27 +08:00 · 2024-11-27 13:55:23 +08:00 · 2024-11-27 13:55:23 +08:00 · cfb3bf25fb
commit cfb3bf25fb
parent 1bf905ddaa
1 changed files with 2 additions and 2 deletions
--- a/vllm/model_executor/layers/quantization/bitsandbytes.py
+++ b/vllm/model_executor/layers/quantization/bitsandbytes.py
@ -26,7 +26,7 @@ class BitsAndBytesConfig(QuantizationConfig):
        llm_int8_enable_fp32_cpu_offload: bool = False,
        llm_int8_has_fp16_weight: bool = False,
        llm_int8_skip_modules: Optional[List[str]] = None,
-        llm_int8_threshold: float = 0.0,
+        llm_int8_threshold: float = 6.0,
    ) -> None:

        self.load_in_8bit = load_in_8bit
@ -103,7 +103,7 @@ class BitsAndBytesConfig(QuantizationConfig):
                                               ["llm_int8_skip_modules"],
                                               default_value=[])
        llm_int8_threshold = get_safe_value(config, ["llm_int8_threshold"],
-                                            default_value=0.0)
+                                            default_value=6.0)

        return cls(
            load_in_8bit=load_in_8bit,