[Bugfix] Fix transformers model impl ignored for mixtral quant (#18602)

Signed-off-by: Tristan Leclercq <tristanleclercq@gmail.com>
This commit is contained in:
Tristan Leclercq 2025-05-23 14:54:13 +02:00 committed by GitHub
parent 52fb23f47e
commit 6220f3c6b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -225,17 +225,16 @@ def get_model_architecture(
"fp8", "compressed-tensors", "gptq_marlin", "awq_marlin", "quark"
]
if (model_config.quantization is not None
and model_config.quantization not in mixtral_supported
and "MixtralForCausalLM" in architectures):
architectures = ["QuantMixtralForCausalLM"]
vllm_supported_archs = ModelRegistry.get_supported_archs()
vllm_not_supported = not any(arch in vllm_supported_archs
for arch in architectures)
if (model_config.model_impl == ModelImpl.TRANSFORMERS or
model_config.model_impl != ModelImpl.VLLM and vllm_not_supported):
architectures = resolve_transformers_arch(model_config, architectures)
elif (model_config.quantization is not None
and model_config.quantization not in mixtral_supported
and "MixtralForCausalLM" in architectures):
architectures = ["QuantMixtralForCausalLM"]
model_cls, arch = ModelRegistry.resolve_model_cls(architectures)
if model_config.task == "embed":