From aa72d9a4ea6b31a845bf4fbd5a97d3175a8c329a Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 23 Apr 2025 06:46:23 -0600 Subject: [PATCH] Mistral-format support for compressed-tensors (#16803) Signed-off-by: mgoin --- vllm/transformers_utils/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 178fdd63a872f..4e2a31ce67297 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -690,6 +690,9 @@ def load_params_config(model: Union[str, Path], revision: Optional[str], "quant_method": "fp8", "activation_scheme": "static" } + elif quantization.get("quant_method") == "compressed-tensors": + # Pass through the quantization config to compressed-tensors + quantization_config = quantization else: raise ValueError( f"Found unknown quantization='{quantization}' in config") @@ -707,6 +710,7 @@ def load_params_config(model: Union[str, Path], revision: Optional[str], if config_type == "multimodal": multimodal_config = config_dict.pop("vision_encoder") + quantization_config = config_dict.get("quantization_config", {}) config_dict = { "text_config": config_dict, @@ -714,6 +718,8 @@ def load_params_config(model: Union[str, Path], revision: Optional[str], } config_dict["architectures"] = ["PixtralForConditionalGeneration"] config_dict["model_type"] = "pixtral" + if quantization_config: + config_dict["quantization_config"] = quantization_config config_dict.update(kwargs)