mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 04:25:01 +08:00
Enable 4bit bnb prequant MOE (#21548)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
1891a265d3
commit
9b94d6ec8f
@ -427,14 +427,10 @@ class BitsAndBytesModelLoader(BaseModelLoader):
|
|||||||
elif isinstance(module, FusedMoE) and hasattr(
|
elif isinstance(module, FusedMoE) and hasattr(
|
||||||
module.quant_method, "quant_config"):
|
module.quant_method, "quant_config"):
|
||||||
# TODO: support FusedMoE with prequant and 8bit.
|
# TODO: support FusedMoE with prequant and 8bit.
|
||||||
if self.pre_quant:
|
if self.pre_quant and self.load_8bit:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Prequant BitsAndBytes models with FusedMoE is not "
|
"Prequant BitsAndBytes 8bit models with FusedMoE "
|
||||||
"supported yet.")
|
"is not supported yet.")
|
||||||
if self.load_8bit:
|
|
||||||
raise ValueError(
|
|
||||||
"BitsAndBytes 8bit quantization with FusedMoE is not "
|
|
||||||
"supported yet.")
|
|
||||||
# Get the corresponding weight name using module name and
|
# Get the corresponding weight name using module name and
|
||||||
# expert_params_mapping.
|
# expert_params_mapping.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user