diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 178fdd63a872f..4e2a31ce67297 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -690,6 +690,9 @@ def load_params_config(model: Union[str, Path], revision: Optional[str], "quant_method": "fp8", "activation_scheme": "static" } + elif quantization.get("quant_method") == "compressed-tensors": + # Pass through the quantization config to compressed-tensors + quantization_config = quantization else: raise ValueError( f"Found unknown quantization='{quantization}' in config") @@ -707,6 +710,7 @@ def load_params_config(model: Union[str, Path], revision: Optional[str], if config_type == "multimodal": multimodal_config = config_dict.pop("vision_encoder") + quantization_config = config_dict.get("quantization_config", {}) config_dict = { "text_config": config_dict, @@ -714,6 +718,8 @@ def load_params_config(model: Union[str, Path], revision: Optional[str], } config_dict["architectures"] = ["PixtralForConditionalGeneration"] config_dict["model_type"] = "pixtral" + if quantization_config: + config_dict["quantization_config"] = quantization_config config_dict.update(kwargs)