From 5870d362be004f6ee1d2230c80f2895758701b7c Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Tue, 9 Dec 2025 13:56:46 -0800 Subject: [PATCH] support falcon_mamba Signed-off-by: Xingyu Liu --- tests/config/model_arch_groundtruth.json | 57 ++++++++++++------- vllm/config/model.py | 5 -- .../model_arch_config_convertor.py | 1 + 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/tests/config/model_arch_groundtruth.json b/tests/config/model_arch_groundtruth.json index c3540ab5bdf03..3401198ad7d56 100644 --- a/tests/config/model_arch_groundtruth.json +++ b/tests/config/model_arch_groundtruth.json @@ -13,7 +13,7 @@ "total_num_kv_heads": 0, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.float32" }, "mistralai/Mamba-Codestral-7B-v0.1": { @@ -30,7 +30,7 @@ "total_num_kv_heads": 0, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11": { @@ -47,9 +47,26 @@ "total_num_kv_heads": 0, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": true, + "is_multimodal_model": true, "dtype": "torch.float32" }, + "tiiuae/falcon-mamba-7b-instruct": { + "architectures": [ + "FalconMambaForCausalLM" + ], + "model_type": "falcon_mamba", + "text_model_type": "falcon_mamba", + "hidden_size": 4096, + "total_num_hidden_layers": 64, + "total_num_attention_heads": 0, + "head_size": 0, + "vocab_size": 65024, + "total_num_kv_heads": 0, + "num_experts": 0, + "is_deepseek_mla": false, + "is_multimodal_model": false, + "dtype": "torch.bfloat16" + }, "Zyphra/Zamba2-7B-instruct": { "architectures": [ "Zamba2ForCausalLM" @@ -64,7 +81,7 @@ "total_num_kv_heads": 32, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "mosaicml/mpt-7b": { @@ -81,7 +98,7 @@ "total_num_kv_heads": 32, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "databricks/dbrx-instruct": { @@ -98,7 +115,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "tiiuae/falcon-7b": { @@ -115,7 +132,7 @@ "total_num_kv_heads": 1, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "tiiuae/falcon-40b": { @@ -132,7 +149,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "luccafong/deepseek_mtp_main_random": { @@ -149,7 +166,7 @@ "total_num_kv_heads": 32, "num_experts": 72, "is_deepseek_mla": true, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "luccafong/deepseek_mtp_draft_random": { @@ -166,7 +183,7 @@ "total_num_kv_heads": 32, "num_experts": 72, "is_deepseek_mla": true, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "Qwen/Qwen3-Next-80B-A3B-Instruct": { @@ -183,7 +200,7 @@ "total_num_kv_heads": 2, "num_experts": 512, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "tiny-random/qwen3-next-moe": { @@ -200,7 +217,7 @@ "total_num_kv_heads": 8, "num_experts": 32, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "zai-org/GLM-4.5": { @@ -217,7 +234,7 @@ "total_num_kv_heads": 8, "num_experts": 160, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "baidu/ERNIE-4.5-21B-A3B-PT": { @@ -234,7 +251,7 @@ "total_num_kv_heads": 4, "num_experts": 64, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "lmsys/gpt-oss-20b-bf16": { @@ -251,7 +268,7 @@ "total_num_kv_heads": 8, "num_experts": 32, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "deepseek-ai/DeepSeek-V3.2-Exp": { @@ -268,7 +285,7 @@ "total_num_kv_heads": 128, "num_experts": 256, "is_deepseek_mla": true, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "meta-llama/Llama-4-Scout-17B-16E-Instruct": { @@ -285,7 +302,7 @@ "total_num_kv_heads": 8, "num_experts": 16, "is_deepseek_mla": false, - "supports_multimodal": true, + "is_multimodal_model": true, "dtype": "torch.bfloat16" }, "nvidia/Llama-3_3-Nemotron-Super-49B-v1": { @@ -302,7 +319,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "XiaomiMiMo/MiMo-7B-RL": { @@ -319,7 +336,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.bfloat16" }, "meituan-longcat/LongCat-Flash-Chat": { @@ -336,7 +353,7 @@ "total_num_kv_heads": 64, "num_experts": 512, "is_deepseek_mla": true, - "supports_multimodal": false, + "is_multimodal_model": false, "dtype": "torch.float32" } } diff --git a/vllm/config/model.py b/vllm/config/model.py index 84e59dfc83d2e..370f5c9b11935 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -1188,15 +1188,10 @@ class ModelConfig: return self.hf_config.model_type in MM_PREFIX_LM_MODELS def get_head_size(self) -> int: - if self.is_attention_free: - return 0 return self.model_arch_config.head_size def get_total_num_kv_heads(self) -> int: """Returns the total number of KV heads.""" - if self.is_attention_free: - return 0 - return self.model_arch_config.total_num_kv_heads def get_num_kv_heads(self, parallel_config: ParallelConfig) -> int: diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py index d453a2395e66c..40cf438f4a804 100644 --- a/vllm/transformers_utils/model_arch_config_convertor.py +++ b/vllm/transformers_utils/model_arch_config_convertor.py @@ -365,6 +365,7 @@ class LongCatFlashMTPModelArchConfigConvertor(ModelArchConfigConvertorBase): # hf_config.model_type -> convertor class MODEL_ARCH_CONFIG_CONVERTORS = { "mamba": MambaModelArchConfigConvertor, + "falcon_mamba": MambaModelArchConfigConvertor, "timm_wrapper": TerratorchModelArchConfigConvertor, "zamba2": Zamba2ModelArchConfigConvertor, "mpt": MPTModelArchConfigConvertor,