From 1cf506d89ef9e4d1fa7c806aebaa2f9ed18e3e5a Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Mon, 8 Dec 2025 16:13:31 -0800 Subject: [PATCH] fix attention free models Signed-off-by: Xingyu Liu --- .../model_arch_config_convertor.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py index db5899eab5c12..999f46083c6e0 100644 --- a/vllm/transformers_utils/model_arch_config_convertor.py +++ b/vllm/transformers_utils/model_arch_config_convertor.py @@ -269,6 +269,22 @@ class ModelArchConfigConvertorBase: return model_arch_config +class MambaModelArchConfigConvertor(ModelArchConfigConvertorBase): + def get_head_size(self) -> int: + return 0 + + def get_total_num_kv_heads(self) -> int: + return 0 + + +class TerratorchModelArchConfigConvertor(ModelArchConfigConvertorBase): + def get_head_size(self) -> int: + return 0 + + def get_total_num_kv_heads(self) -> int: + return 0 + + class Zamba2ModelArchConfigConvertor(ModelArchConfigConvertorBase): def get_head_size(self) -> int: return getattr(self.hf_text_config, "attention_head_dim", 0) @@ -357,6 +373,9 @@ class LongCatFlashMTPModelArchConfigConvertor(ModelArchConfigConvertorBase): # hf_config.model_type -> convertor class MODEL_ARCH_CONFIG_CONVERTORS = { + "mamba": MambaModelArchConfigConvertor, + "mamba2": MambaModelArchConfigConvertor, + "terratorch": TerratorchModelArchConfigConvertor, "zamba2": Zamba2ModelArchConfigConvertor, "mpt": MPTModelArchConfigConvertor, "dbrx": DbrxModelArchConfigConvertor,