mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 03:24:56 +08:00
[quantization][config] enable override existing quant_config (#28510)
Signed-off-by: Hank <hcc.mayday@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
parent
f2b8e1c551
commit
4d5943bda6
@ -7,6 +7,7 @@ See https://github.com/vllm-project/vllm/issues/11926 for more details.
|
||||
Run `pytest tests/quantization/test_register_quantization_config.py`.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
@ -100,17 +101,22 @@ class CustomQuantConfig(QuantizationConfig):
|
||||
return None
|
||||
|
||||
|
||||
def test_register_quantization_config():
|
||||
def test_register_quantization_config(caplog_vllm):
|
||||
"""Test register custom quantization config."""
|
||||
|
||||
# The quantization method `custom_quant` should be registered.
|
||||
assert get_quantization_config("custom_quant") == CustomQuantConfig
|
||||
|
||||
# The quantization method `custom_quant` is already exists,
|
||||
# should raise an error.
|
||||
with pytest.raises(ValueError):
|
||||
# should raise a warning when re-registering it.
|
||||
with caplog_vllm.at_level(logging.WARNING):
|
||||
register_quantization_config("custom_quant")(CustomQuantConfig)
|
||||
|
||||
assert any(
|
||||
"The quantization method 'custom_quant' already exists" in message
|
||||
for message in caplog_vllm.messages
|
||||
), "Expected a warning when re-registering custom_quant"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
argnames="model",
|
||||
|
||||
@ -3,8 +3,11 @@
|
||||
|
||||
from typing import Literal, get_args
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
QuantizationMethods = Literal[
|
||||
"awq",
|
||||
"deepspeedfp",
|
||||
@ -70,15 +73,20 @@ def register_quantization_config(quantization: str):
|
||||
|
||||
def _wrapper(quant_config_cls):
|
||||
if quantization in QUANTIZATION_METHODS:
|
||||
raise ValueError(
|
||||
f"The quantization method `{quantization}` is already exists."
|
||||
logger.warning(
|
||||
"The quantization method '%s' already exists and will be "
|
||||
"overwritten by the quantization config %s.",
|
||||
quantization,
|
||||
quant_config_cls,
|
||||
)
|
||||
else:
|
||||
QUANTIZATION_METHODS.append(quantization)
|
||||
|
||||
if not issubclass(quant_config_cls, QuantizationConfig):
|
||||
raise ValueError(
|
||||
"The quantization config must be a subclass of `QuantizationConfig`."
|
||||
)
|
||||
_CUSTOMIZED_METHOD_TO_QUANT_CONFIG[quantization] = quant_config_cls
|
||||
QUANTIZATION_METHODS.append(quantization)
|
||||
return quant_config_cls
|
||||
|
||||
return _wrapper
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user