mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:34:57 +08:00
[Feature][Quantization] auto_round support for mixed bits quantization (#23812)
Signed-off-by: n1ck-guo <heng.guo@intel.com> Signed-off-by: Heng Guo <heng.guo@intel.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
f9e7ad5400
commit
87778d5f00
@ -436,6 +436,12 @@ class AutoRoundConfig(QuantizationConfig):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def get_quant_method(self, layer: torch.nn.Module, prefix: str):
|
def get_quant_method(self, layer: torch.nn.Module, prefix: str):
|
||||||
|
if prefix and self.extra_config:
|
||||||
|
for layer_name in self.extra_config:
|
||||||
|
if (
|
||||||
|
layer_name == prefix or layer_name == f"model.{prefix}"
|
||||||
|
) and self.extra_config[layer_name].get("bits", 16) >= 16:
|
||||||
|
return UnquantizedLinearMethod()
|
||||||
if (
|
if (
|
||||||
current_platform.is_cpu()
|
current_platform.is_cpu()
|
||||||
or current_platform.is_xpu()
|
or current_platform.is_xpu()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user