mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-08 05:37:02 +08:00
[Bugfix] Fix transformers model impl ignored for mixtral quant (#18602)
Signed-off-by: Tristan Leclercq <tristanleclercq@gmail.com>
This commit is contained in:
parent
52fb23f47e
commit
6220f3c6b0
@ -225,17 +225,16 @@ def get_model_architecture(
|
||||
"fp8", "compressed-tensors", "gptq_marlin", "awq_marlin", "quark"
|
||||
]
|
||||
|
||||
if (model_config.quantization is not None
|
||||
and model_config.quantization not in mixtral_supported
|
||||
and "MixtralForCausalLM" in architectures):
|
||||
architectures = ["QuantMixtralForCausalLM"]
|
||||
|
||||
vllm_supported_archs = ModelRegistry.get_supported_archs()
|
||||
vllm_not_supported = not any(arch in vllm_supported_archs
|
||||
for arch in architectures)
|
||||
if (model_config.model_impl == ModelImpl.TRANSFORMERS or
|
||||
model_config.model_impl != ModelImpl.VLLM and vllm_not_supported):
|
||||
architectures = resolve_transformers_arch(model_config, architectures)
|
||||
elif (model_config.quantization is not None
|
||||
and model_config.quantization not in mixtral_supported
|
||||
and "MixtralForCausalLM" in architectures):
|
||||
architectures = ["QuantMixtralForCausalLM"]
|
||||
|
||||
model_cls, arch = ModelRegistry.resolve_model_cls(architectures)
|
||||
if model_config.task == "embed":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user