[quantization][config] enable override existing quant_config (#28510)

Signed-off-by: Hank <hcc.mayday@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
2026-03-16 16:07:15 +08:00 · 2025-11-14 09:24:10 +08:00 · 2025-11-14 09:24:10 +08:00 · 4d5943bda6
commit 4d5943bda6
parent f2b8e1c551
2 changed files with 20 additions and 6 deletions
--- a/tests/quantization/test_register_quantization_config.py
+++ b/tests/quantization/test_register_quantization_config.py
@ -7,6 +7,7 @@ See https://github.com/vllm-project/vllm/issues/11926 for more details.
 Run `pytest tests/quantization/test_register_quantization_config.py`.
 """

+import logging
 from typing import Any

 import pytest
@ -100,17 +101,22 @@ class CustomQuantConfig(QuantizationConfig):
        return None


-def test_register_quantization_config():
+def test_register_quantization_config(caplog_vllm):
    """Test register custom quantization config."""

    # The quantization method `custom_quant` should be registered.
    assert get_quantization_config("custom_quant") == CustomQuantConfig

    # The quantization method `custom_quant` is already exists,
-    # should raise an error.
-    with pytest.raises(ValueError):
+    # should raise a warning when re-registering it.
+    with caplog_vllm.at_level(logging.WARNING):
        register_quantization_config("custom_quant")(CustomQuantConfig)

+    assert any(
+        "The quantization method 'custom_quant' already exists" in message
+        for message in caplog_vllm.messages
+    ), "Expected a warning when re-registering custom_quant"
+

@pytest.mark.parametrize(
    argnames="model",
--- a/vllm/model_executor/layers/quantization/init.py
+++ b/vllm/model_executor/layers/quantization/init.py
@ -3,8 +3,11 @@

 from typing import Literal, get_args

+from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.base_config import QuantizationConfig

+logger = init_logger(__name__)
+
 QuantizationMethods = Literal[
    "awq",
    "deepspeedfp",
@ -70,15 +73,20 @@ def register_quantization_config(quantization: str):

    def _wrapper(quant_config_cls):
        if quantization in QUANTIZATION_METHODS:
-            raise ValueError(
-                f"The quantization method `{quantization}` is already exists."
+            logger.warning(
+                "The quantization method '%s' already exists and will be "
+                "overwritten by the quantization config %s.",
+                quantization,
+                quant_config_cls,
            )
+        else:
+            QUANTIZATION_METHODS.append(quantization)
+
        if not issubclass(quant_config_cls, QuantizationConfig):
            raise ValueError(
                "The quantization config must be a subclass of `QuantizationConfig`."
            )
        _CUSTOMIZED_METHOD_TO_QUANT_CONFIG[quantization] = quant_config_cls
-        QUANTIZATION_METHODS.append(quantization)
        return quant_config_cls

    return _wrapper