[Doc] Update list of supported models (#8987)

This commit is contained in:
Cyrus Leung 2024-10-02 00:35:39 +08:00 committed by GitHub
parent 35bd215168
commit 4f341bd4bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 8 deletions

View File

@ -20,7 +20,7 @@ Decoder-only Language Models
- Example HuggingFace Models - Example HuggingFace Models
- :ref:`LoRA <lora>` - :ref:`LoRA <lora>`
* - :code:`AquilaForCausalLM` * - :code:`AquilaForCausalLM`
- Aquila & Aquila2 - Aquila, Aquila2
- :code:`BAAI/Aquila-7B`, :code:`BAAI/AquilaChat-7B`, etc. - :code:`BAAI/Aquila-7B`, :code:`BAAI/AquilaChat-7B`, etc.
- ✅︎ - ✅︎
* - :code:`ArcticForCausalLM` * - :code:`ArcticForCausalLM`
@ -28,7 +28,7 @@ Decoder-only Language Models
- :code:`Snowflake/snowflake-arctic-base`, :code:`Snowflake/snowflake-arctic-instruct`, etc. - :code:`Snowflake/snowflake-arctic-base`, :code:`Snowflake/snowflake-arctic-instruct`, etc.
- -
* - :code:`BaiChuanForCausalLM` * - :code:`BaiChuanForCausalLM`
- Baichuan & Baichuan2 - Baichuan2, Baichuan
- :code:`baichuan-inc/Baichuan2-13B-Chat`, :code:`baichuan-inc/Baichuan-7B`, etc. - :code:`baichuan-inc/Baichuan2-13B-Chat`, :code:`baichuan-inc/Baichuan-7B`, etc.
- ✅︎ - ✅︎
* - :code:`BloomForCausalLM` * - :code:`BloomForCausalLM`
@ -51,6 +51,14 @@ Decoder-only Language Models
- DeciLM - DeciLM
- :code:`Deci/DeciLM-7B`, :code:`Deci/DeciLM-7B-instruct`, etc. - :code:`Deci/DeciLM-7B`, :code:`Deci/DeciLM-7B-instruct`, etc.
- -
* - :code:`DeepseekForCausalLM`
- DeepSeek
- :code:`deepseek-ai/deepseek-llm-67b-base`, :code:`deepseek-ai/deepseek-llm-7b-chat` etc.
-
* - :code:`DeepseekV2ForCausalLM`
- DeepSeek-V2
- :code:`deepseek-ai/DeepSeek-V2`, :code:`deepseek-ai/DeepSeek-V2-Chat` etc.
-
* - :code:`ExaoneForCausalLM` * - :code:`ExaoneForCausalLM`
- EXAONE-3 - EXAONE-3
- :code:`LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc. - :code:`LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc.
@ -83,6 +91,10 @@ Decoder-only Language Models
- GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM - GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM
- :code:`EleutherAI/gpt-neox-20b`, :code:`EleutherAI/pythia-12b`, :code:`OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5`, :code:`databricks/dolly-v2-12b`, :code:`stabilityai/stablelm-tuned-alpha-7b`, etc. - :code:`EleutherAI/gpt-neox-20b`, :code:`EleutherAI/pythia-12b`, :code:`OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5`, :code:`databricks/dolly-v2-12b`, :code:`stabilityai/stablelm-tuned-alpha-7b`, etc.
- -
* - :code:`GraniteForCausalLM`
- Granite, Power-LM
- :code:`ibm/granite-7b-base`, :code:`ibm/PowerLM-3b` etc.
- ✅︎
* - :code:`InternLMForCausalLM` * - :code:`InternLMForCausalLM`
- InternLM - InternLM
- :code:`internlm/internlm-7b`, :code:`internlm/internlm-chat-7b`, etc. - :code:`internlm/internlm-7b`, :code:`internlm/internlm-chat-7b`, etc.
@ -97,7 +109,7 @@ Decoder-only Language Models
- -
* - :code:`JambaForCausalLM` * - :code:`JambaForCausalLM`
- Jamba - Jamba
- :code:`ai21labs/Jamba-v0.1`, etc. - :code:`ai21labs/AI21-Jamba-1.5-Large`, :code:`ai21labs/AI21-Jamba-1.5-Mini`, :code:`ai21labs/Jamba-v0.1`, etc.
- ✅︎ - ✅︎
* - :code:`LlamaForCausalLM` * - :code:`LlamaForCausalLM`
- Llama 3.1, Llama 3, Llama 2, LLaMA, Yi - Llama 3.1, Llama 3, Llama 2, LLaMA, Yi
@ -177,7 +189,7 @@ Decoder-only Language Models
- -
* - :code:`StableLmForCausalLM` * - :code:`StableLmForCausalLM`
- StableLM - StableLM
- :code:`stabilityai/stablelm-3b-4e1t/` , :code:`stabilityai/stablelm-base-alpha-7b-v2`, etc. - :code:`stabilityai/stablelm-3b-4e1t`, :code:`stabilityai/stablelm-base-alpha-7b-v2`, etc.
- -
* - :code:`Starcoder2ForCausalLM` * - :code:`Starcoder2ForCausalLM`
- Starcoder2 - Starcoder2

View File

@ -12,6 +12,7 @@ logger = init_logger(__name__)
_GENERATION_MODELS = { _GENERATION_MODELS = {
"AquilaModel": ("llama", "LlamaForCausalLM"), "AquilaModel": ("llama", "LlamaForCausalLM"),
"AquilaForCausalLM": ("llama", "LlamaForCausalLM"), # AquilaChat2 "AquilaForCausalLM": ("llama", "LlamaForCausalLM"), # AquilaChat2
"ArcticForCausalLM": ("arctic", "ArcticForCausalLM"),
"BaiChuanForCausalLM": ("baichuan", "BaiChuanForCausalLM"), # baichuan-7b "BaiChuanForCausalLM": ("baichuan", "BaiChuanForCausalLM"), # baichuan-7b
"BaichuanForCausalLM": ("baichuan", "BaichuanForCausalLM"), # baichuan-13b "BaichuanForCausalLM": ("baichuan", "BaichuanForCausalLM"), # baichuan-13b
"BloomForCausalLM": ("bloom", "BloomForCausalLM"), "BloomForCausalLM": ("bloom", "BloomForCausalLM"),
@ -30,9 +31,11 @@ _GENERATION_MODELS = {
"GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"), "GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
"GPTJForCausalLM": ("gpt_j", "GPTJForCausalLM"), "GPTJForCausalLM": ("gpt_j", "GPTJForCausalLM"),
"GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"), "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
"GraniteForCausalLM": ("granite", "GraniteForCausalLM"),
"InternLMForCausalLM": ("llama", "LlamaForCausalLM"), "InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
"InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"), "InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
"JAISLMHeadModel": ("jais", "JAISLMHeadModel"), "JAISLMHeadModel": ("jais", "JAISLMHeadModel"),
"JambaForCausalLM": ("jamba", "JambaForCausalLM"),
"LlamaForCausalLM": ("llama", "LlamaForCausalLM"), "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
# For decapoda-research/llama-* # For decapoda-research/llama-*
"LLaMAForCausalLM": ("llama", "LlamaForCausalLM"), "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
@ -52,6 +55,7 @@ _GENERATION_MODELS = {
"PersimmonForCausalLM": ("persimmon", "PersimmonForCausalLM"), "PersimmonForCausalLM": ("persimmon", "PersimmonForCausalLM"),
"PhiForCausalLM": ("phi", "PhiForCausalLM"), "PhiForCausalLM": ("phi", "PhiForCausalLM"),
"Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"), "Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"),
"Phi3SmallForCausalLM": ("phi3_small", "Phi3SmallForCausalLM"),
"PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"), "PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"),
"Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"), "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
"Qwen2MoeForCausalLM": ("qwen2_moe", "Qwen2MoeForCausalLM"), "Qwen2MoeForCausalLM": ("qwen2_moe", "Qwen2MoeForCausalLM"),
@ -62,14 +66,11 @@ _GENERATION_MODELS = {
"StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"), "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
"Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"), "Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
"SolarForCausalLM": ("solar", "SolarForCausalLM"), "SolarForCausalLM": ("solar", "SolarForCausalLM"),
"ArcticForCausalLM": ("arctic", "ArcticForCausalLM"),
"XverseForCausalLM": ("xverse", "XverseForCausalLM"), "XverseForCausalLM": ("xverse", "XverseForCausalLM"),
"Phi3SmallForCausalLM": ("phi3_small", "Phi3SmallForCausalLM"), # NOTE: The below models are for speculative decoding only
"MedusaModel": ("medusa", "Medusa"), "MedusaModel": ("medusa", "Medusa"),
"EAGLEModel": ("eagle", "EAGLE"), "EAGLEModel": ("eagle", "EAGLE"),
"MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"), "MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"),
"JambaForCausalLM": ("jamba", "JambaForCausalLM"),
"GraniteForCausalLM": ("granite", "GraniteForCausalLM")
} }
_EMBEDDING_MODELS = { _EMBEDDING_MODELS = {