mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 07:44:55 +08:00
[quantization][config] enable override existing quant_config (#28510)
Signed-off-by: Hank <hcc.mayday@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
parent
f2b8e1c551
commit
4d5943bda6
@ -7,6 +7,7 @@ See https://github.com/vllm-project/vllm/issues/11926 for more details.
|
|||||||
Run `pytest tests/quantization/test_register_quantization_config.py`.
|
Run `pytest tests/quantization/test_register_quantization_config.py`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@ -100,17 +101,22 @@ class CustomQuantConfig(QuantizationConfig):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def test_register_quantization_config():
|
def test_register_quantization_config(caplog_vllm):
|
||||||
"""Test register custom quantization config."""
|
"""Test register custom quantization config."""
|
||||||
|
|
||||||
# The quantization method `custom_quant` should be registered.
|
# The quantization method `custom_quant` should be registered.
|
||||||
assert get_quantization_config("custom_quant") == CustomQuantConfig
|
assert get_quantization_config("custom_quant") == CustomQuantConfig
|
||||||
|
|
||||||
# The quantization method `custom_quant` is already exists,
|
# The quantization method `custom_quant` is already exists,
|
||||||
# should raise an error.
|
# should raise a warning when re-registering it.
|
||||||
with pytest.raises(ValueError):
|
with caplog_vllm.at_level(logging.WARNING):
|
||||||
register_quantization_config("custom_quant")(CustomQuantConfig)
|
register_quantization_config("custom_quant")(CustomQuantConfig)
|
||||||
|
|
||||||
|
assert any(
|
||||||
|
"The quantization method 'custom_quant' already exists" in message
|
||||||
|
for message in caplog_vllm.messages
|
||||||
|
), "Expected a warning when re-registering custom_quant"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
argnames="model",
|
argnames="model",
|
||||||
|
|||||||
@ -3,8 +3,11 @@
|
|||||||
|
|
||||||
from typing import Literal, get_args
|
from typing import Literal, get_args
|
||||||
|
|
||||||
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
||||||
|
|
||||||
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
QuantizationMethods = Literal[
|
QuantizationMethods = Literal[
|
||||||
"awq",
|
"awq",
|
||||||
"deepspeedfp",
|
"deepspeedfp",
|
||||||
@ -70,15 +73,20 @@ def register_quantization_config(quantization: str):
|
|||||||
|
|
||||||
def _wrapper(quant_config_cls):
|
def _wrapper(quant_config_cls):
|
||||||
if quantization in QUANTIZATION_METHODS:
|
if quantization in QUANTIZATION_METHODS:
|
||||||
raise ValueError(
|
logger.warning(
|
||||||
f"The quantization method `{quantization}` is already exists."
|
"The quantization method '%s' already exists and will be "
|
||||||
|
"overwritten by the quantization config %s.",
|
||||||
|
quantization,
|
||||||
|
quant_config_cls,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
QUANTIZATION_METHODS.append(quantization)
|
||||||
|
|
||||||
if not issubclass(quant_config_cls, QuantizationConfig):
|
if not issubclass(quant_config_cls, QuantizationConfig):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The quantization config must be a subclass of `QuantizationConfig`."
|
"The quantization config must be a subclass of `QuantizationConfig`."
|
||||||
)
|
)
|
||||||
_CUSTOMIZED_METHOD_TO_QUANT_CONFIG[quantization] = quant_config_cls
|
_CUSTOMIZED_METHOD_TO_QUANT_CONFIG[quantization] = quant_config_cls
|
||||||
QUANTIZATION_METHODS.append(quantization)
|
|
||||||
return quant_config_cls
|
return quant_config_cls
|
||||||
|
|
||||||
return _wrapper
|
return _wrapper
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user