[Doc] Update list of supported models (#8987)

2025-12-14 06:45:00 +08:00 · 2024-10-02 00:35:39 +08:00 · 2024-10-02 00:35:39 +08:00 · 4f341bd4bf
commit 4f341bd4bf
parent 35bd215168
2 changed files with 21 additions and 8 deletions
--- a/docs/source/models/supported_models.rst
+++ b/docs/source/models/supported_models.rst
@ -20,7 +20,7 @@ Decoder-only Language Models
    - Example HuggingFace Models
    - :ref:`LoRA <lora>`
  * - :code:`AquilaForCausalLM`
-    - Aquila & Aquila2
+    - Aquila, Aquila2
    - :code:`BAAI/Aquila-7B`, :code:`BAAI/AquilaChat-7B`, etc.
    - ✅︎
  * - :code:`ArcticForCausalLM`
@ -28,7 +28,7 @@ Decoder-only Language Models
    - :code:`Snowflake/snowflake-arctic-base`, :code:`Snowflake/snowflake-arctic-instruct`, etc.
    -
  * - :code:`BaiChuanForCausalLM`
-    - Baichuan & Baichuan2
+    - Baichuan2, Baichuan
    - :code:`baichuan-inc/Baichuan2-13B-Chat`, :code:`baichuan-inc/Baichuan-7B`, etc.
    - ✅︎
  * - :code:`BloomForCausalLM`
@ -51,6 +51,14 @@ Decoder-only Language Models
    - DeciLM
    - :code:`Deci/DeciLM-7B`, :code:`Deci/DeciLM-7B-instruct`, etc.
    -
  * - :code:`DeepseekForCausalLM`
    - DeepSeek
    - :code:`deepseek-ai/deepseek-llm-67b-base`, :code:`deepseek-ai/deepseek-llm-7b-chat` etc.
    - 
  * - :code:`DeepseekV2ForCausalLM`
    - DeepSeek-V2
    - :code:`deepseek-ai/DeepSeek-V2`, :code:`deepseek-ai/DeepSeek-V2-Chat` etc.
    - 
  * - :code:`ExaoneForCausalLM`
    - EXAONE-3
    - :code:`LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc.
@ -83,6 +91,10 @@ Decoder-only Language Models
    - GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM
    - :code:`EleutherAI/gpt-neox-20b`, :code:`EleutherAI/pythia-12b`, :code:`OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5`, :code:`databricks/dolly-v2-12b`, :code:`stabilityai/stablelm-tuned-alpha-7b`, etc.
    -
  * - :code:`GraniteForCausalLM`
    - Granite, Power-LM
    - :code:`ibm/granite-7b-base`, :code:`ibm/PowerLM-3b` etc.
    - ✅︎
  * - :code:`InternLMForCausalLM`
    - InternLM
    - :code:`internlm/internlm-7b`, :code:`internlm/internlm-chat-7b`, etc.
@ -97,7 +109,7 @@ Decoder-only Language Models
    -
  * - :code:`JambaForCausalLM`
    - Jamba
-    - :code:`ai21labs/Jamba-v0.1`, etc.
+    - :code:`ai21labs/AI21-Jamba-1.5-Large`, :code:`ai21labs/AI21-Jamba-1.5-Mini`, :code:`ai21labs/Jamba-v0.1`, etc.
    - ✅︎
  * - :code:`LlamaForCausalLM`
    - Llama 3.1, Llama 3, Llama 2, LLaMA, Yi
@ -177,7 +189,7 @@ Decoder-only Language Models
    -
  * - :code:`StableLmForCausalLM`
    - StableLM
-    - :code:`stabilityai/stablelm-3b-4e1t/` , :code:`stabilityai/stablelm-base-alpha-7b-v2`, etc.
+    - :code:`stabilityai/stablelm-3b-4e1t`, :code:`stabilityai/stablelm-base-alpha-7b-v2`, etc.
    -
  * - :code:`Starcoder2ForCausalLM`
    - Starcoder2
--- a/vllm/model_executor/models/init.py
+++ b/vllm/model_executor/models/init.py
@ -12,6 +12,7 @@ logger = init_logger(__name__)
 _GENERATION_MODELS = {
    "AquilaModel": ("llama", "LlamaForCausalLM"),
    "AquilaForCausalLM": ("llama", "LlamaForCausalLM"),  # AquilaChat2
    "ArcticForCausalLM": ("arctic", "ArcticForCausalLM"),
    "BaiChuanForCausalLM": ("baichuan", "BaiChuanForCausalLM"),  # baichuan-7b
    "BaichuanForCausalLM": ("baichuan", "BaichuanForCausalLM"),  # baichuan-13b
    "BloomForCausalLM": ("bloom", "BloomForCausalLM"),
@ -30,9 +31,11 @@ _GENERATION_MODELS = {
    "GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
    "GPTJForCausalLM": ("gpt_j", "GPTJForCausalLM"),
    "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
    "GraniteForCausalLM": ("granite", "GraniteForCausalLM"),
    "InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
    "InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
    "JAISLMHeadModel": ("jais", "JAISLMHeadModel"),
    "JambaForCausalLM": ("jamba", "JambaForCausalLM"),
    "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
    # For decapoda-research/llama-*
    "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
@ -52,6 +55,7 @@ _GENERATION_MODELS = {
    "PersimmonForCausalLM": ("persimmon", "PersimmonForCausalLM"),
    "PhiForCausalLM": ("phi", "PhiForCausalLM"),
    "Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"),
    "Phi3SmallForCausalLM": ("phi3_small", "Phi3SmallForCausalLM"),
    "PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"),
    "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
    "Qwen2MoeForCausalLM": ("qwen2_moe", "Qwen2MoeForCausalLM"),
@ -62,14 +66,11 @@ _GENERATION_MODELS = {
    "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
    "Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
    "SolarForCausalLM": ("solar", "SolarForCausalLM"),
    "ArcticForCausalLM": ("arctic", "ArcticForCausalLM"),
    "XverseForCausalLM": ("xverse", "XverseForCausalLM"),
-    "Phi3SmallForCausalLM": ("phi3_small", "Phi3SmallForCausalLM"),
+    # NOTE: The below models are for speculative decoding only
    "MedusaModel": ("medusa", "Medusa"),
    "EAGLEModel": ("eagle", "EAGLE"),
    "MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"),
    "JambaForCausalLM": ("jamba", "JambaForCausalLM"),
    "GraniteForCausalLM": ("granite", "GraniteForCausalLM")
 }
 _EMBEDDING_MODELS = {