diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index c058c20f1ed7..cd1228836b87 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -328,7 +328,7 @@ th { | `BambaForCausalLM` | Bamba | `ibm-ai-platform/Bamba-9B-fp8`, `ibm-ai-platform/Bamba-9B` | ✅︎ | ✅︎ | ✅︎ | | `BloomForCausalLM` | BLOOM, BLOOMZ, BLOOMChat | `bigscience/bloom`, `bigscience/bloomz`, etc. | | ✅︎ | | | `BartForConditionalGeneration` | BART | `facebook/bart-base`, `facebook/bart-large-cnn`, etc. | | | | -| `ChatGLMModel`, `ChatGLMForConditionalGeneration` | ChatGLM | `THUDM/chatglm2-6b`, `THUDM/chatglm3-6b`, `ShieldLM-6B-chatglm3`, etc. | ✅︎ | ✅︎ | ✅︎ | +| `ChatGLMModel`, `ChatGLMForConditionalGeneration` | ChatGLM | `zai-org/chatglm2-6b`, `zai-org/chatglm3-6b`, `ShieldLM-6B-chatglm3`, etc. | ✅︎ | ✅︎ | ✅︎ | | `CohereForCausalLM`, `Cohere2ForCausalLM` | Command-R | `CohereForAI/c4ai-command-r-v01`, `CohereForAI/c4ai-command-r7b-12-2024`, etc. | ✅︎ | ✅︎ | ✅︎ | | `DbrxForCausalLM` | DBRX | `databricks/dbrx-base`, `databricks/dbrx-instruct`, etc. | | ✅︎ | ✅︎ | | `DeciLMForCausalLM` | DeciLM | `nvidia/Llama-3_3-Nemotron-Super-49B-v1`, etc. | ✅︎ | ✅︎ | ✅︎ | @@ -348,8 +348,8 @@ th { | `Gemma2ForCausalLM` | Gemma 2 | `google/gemma-2-9b`, `google/gemma-2-27b`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Gemma3ForCausalLM` | Gemma 3 | `google/gemma-3-1b-it`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Gemma3nForConditionalGeneration` | Gemma 3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | | ✅︎ | -| `GlmForCausalLM` | GLM-4 | `THUDM/glm-4-9b-chat-hf`, etc. | ✅︎ | ✅︎ | ✅︎ | -| `Glm4ForCausalLM` | GLM-4-0414 | `THUDM/GLM-4-32B-0414`, etc. | ✅︎ | ✅︎ | ✅︎ | +| `GlmForCausalLM` | GLM-4 | `zai-org/glm-4-9b-chat-hf`, etc. | ✅︎ | ✅︎ | ✅︎ | +| `Glm4ForCausalLM` | GLM-4-0414 | `zai-org/GLM-4-32B-0414`, etc. | ✅︎ | ✅︎ | ✅︎ | | `GPT2LMHeadModel` | GPT-2 | `gpt2`, `gpt2-xl`, etc. | | ✅︎ | ✅︎ | | `GPTBigCodeForCausalLM` | StarCoder, SantaCoder, WizardCoder | `bigcode/starcoder`, `bigcode/gpt_bigcode-santacoder`, `WizardLM/WizardCoder-15B-V1.0`, etc. | ✅︎ | ✅︎ | ✅︎ | | `GPTJForCausalLM` | GPT-J | `EleutherAI/gpt-j-6b`, `nomic-ai/gpt4all-j`, etc. | | ✅︎ | ✅︎ | @@ -589,8 +589,8 @@ See [this page](generative_models.md) for more information on how to use generat | `Florence2ForConditionalGeneration` | Florence-2 | T + I | `microsoft/Florence-2-base`, `microsoft/Florence-2-large`, etc. | | | | | `FuyuForCausalLM` | Fuyu | T + I | `adept/fuyu-8b`, etc. | | ✅︎ | ✅︎ | | `Gemma3ForConditionalGeneration` | Gemma 3 | T + I+ | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ | -| `GLM4VForCausalLM`^ | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ | -| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + IE+ + VE+ | `THUDM/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ | +| `GLM4VForCausalLM`^ | GLM-4V | T + I | `zai-org/glm-4v-9b`, `zai-org/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ | +| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + IE+ + VE+ | `zai-org/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Glm4MoeForCausalLM` | GLM-4.5 | T + IE+ + VE+ | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + IE+ + VE+ | `zai-org/GLM-4.5V-Air`, etc. | ✅︎ | ✅︎ | ✅︎ | | `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ | diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index a75b8e2b047d..16bb3712f551 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -221,7 +221,7 @@ def run_gemma3(questions: list[str], modality: str) -> ModelRequestData: # GLM-4v def run_glm4v(questions: list[str], modality: str) -> ModelRequestData: assert modality == "image" - model_name = "THUDM/glm-4v-9b" + model_name = "zai-org/glm-4v-9b" engine_args = EngineArgs( model=model_name, @@ -250,7 +250,7 @@ def run_glm4v(questions: list[str], modality: str) -> ModelRequestData: # GLM-4.1V def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData: - model_name = "THUDM/GLM-4.1V-9B-Thinking" + model_name = "zai-org/GLM-4.1V-9B-Thinking" engine_args = EngineArgs( model=model_name, diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index cfb2e2dd15f4..12dd7c422263 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -154,7 +154,7 @@ TEXT_GENERATION_MODELS = { "baichuan-inc/Baichuan-7B": PPTestSettings.fast(), "baichuan-inc/Baichuan2-13B-Chat": PPTestSettings.fast(), "bigscience/bloomz-1b1": PPTestSettings.fast(), - "THUDM/chatglm3-6b": PPTestSettings.fast(), + "zai-org/chatglm3-6b": PPTestSettings.fast(), "CohereForAI/c4ai-command-r-v01": PPTestSettings.fast(load_format="dummy"), "databricks/dbrx-instruct": PPTestSettings.fast(load_format="dummy"), "Deci/DeciLM-7B-instruct": PPTestSettings.fast(), @@ -224,7 +224,7 @@ MULTIMODAL_MODELS = { "Salesforce/blip2-opt-6.7b": PPTestSettings.fast(), "facebook/chameleon-7b": PPTestSettings.fast(), "adept/fuyu-8b": PPTestSettings.fast(), - "THUDM/glm-4v-9b": PPTestSettings.fast(), + "zai-org/glm-4v-9b": PPTestSettings.fast(), "OpenGVLab/InternVL2-1B": PPTestSettings.fast(), "llava-hf/llava-1.5-7b-hf": PPTestSettings.fast(), "llava-hf/llava-v1.6-mistral-7b-hf": PPTestSettings.fast(), diff --git a/tests/lora/test_add_lora.py b/tests/lora/test_add_lora.py index cc8160b2860d..d7b019509fa3 100644 --- a/tests/lora/test_add_lora.py +++ b/tests/lora/test_add_lora.py @@ -14,7 +14,7 @@ from vllm.lora.request import LoRARequest from vllm.sampling_params import SamplingParams from vllm.utils import merge_async_iterators -MODEL_PATH = "THUDM/chatglm3-6b" +MODEL_PATH = "zai-org/chatglm3-6b" LORA_RANK = 64 DEFAULT_MAX_LORAS = 4 * 3 diff --git a/tests/lora/test_chatglm3_tp.py b/tests/lora/test_chatglm3_tp.py index 5481b413b8f5..fb00e7b65b04 100644 --- a/tests/lora/test_chatglm3_tp.py +++ b/tests/lora/test_chatglm3_tp.py @@ -6,7 +6,7 @@ from vllm.lora.request import LoRARequest from ..utils import create_new_process_for_each_test, multi_gpu_test -MODEL_PATH = "THUDM/chatglm3-6b" +MODEL_PATH = "zai-org/chatglm3-6b" PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\n"\n##Instruction:\nconcert_singer contains tables such as stadium, singer, concert, singer_in_concert. Table stadium has columns such as Stadium_ID, Location, Name, Capacity, Highest, Lowest, Average. Stadium_ID is the primary key.\nTable singer has columns such as Singer_ID, Name, Country, Song_Name, Song_release_year, Age, Is_male. Singer_ID is the primary key.\nTable concert has columns such as concert_ID, concert_Name, Theme, Stadium_ID, Year. concert_ID is the primary key.\nTable singer_in_concert has columns such as concert_ID, Singer_ID. concert_ID is the primary key.\nThe Stadium_ID of concert is the foreign key of Stadium_ID of stadium.\nThe Singer_ID of singer_in_concert is the foreign key of Singer_ID of singer.\nThe concert_ID of singer_in_concert is the foreign key of concert_ID of concert.\n\n###Input:\n{query}\n\n###Response:""" # noqa: E501 diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py index ea240d227889..57382914bfea 100644 --- a/tests/models/language/generation/test_common.py +++ b/tests/models/language/generation/test_common.py @@ -53,7 +53,7 @@ AITER_MODEL_LIST = [ marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), pytest.param( - "THUDM/chatglm3-6b", # chatglm (text-only) + "zai-org/chatglm3-6b", # chatglm (text-only) ), pytest.param( "meta-llama/Llama-3.2-1B-Instruct", # llama diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 967228b54a0a..8cb826c1144d 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -355,7 +355,7 @@ VLM_TEST_SETTINGS = { num_logprobs=10, ), "glm4v": VLMTestInfo( - models=["THUDM/glm-4v-9b"], + models=["zai-org/glm-4v-9b"], test_type=VLMTestType.IMAGE, prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", # noqa: E501 single_image_prompts=IMAGE_ASSETS.prompts({ @@ -374,7 +374,7 @@ VLM_TEST_SETTINGS = { marks=[large_gpu_mark(min_gb=32)], ), "glm4_1v": VLMTestInfo( - models=["THUDM/GLM-4.1V-9B-Thinking"], + models=["zai-org/GLM-4.1V-9B-Thinking"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", # noqa: E501 img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>", # noqa: E501 @@ -388,7 +388,7 @@ VLM_TEST_SETTINGS = { marks=[large_gpu_mark(min_gb=32)], ), "glm4_1v-video": VLMTestInfo( - models=["THUDM/GLM-4.1V-9B-Thinking"], + models=["zai-org/GLM-4.1V-9B-Thinking"], # GLM4.1V require include video metadata for input test_type=VLMTestType.CUSTOM_INPUTS, max_model_len=4096, diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index f70e03d0f669..bd1c55d95dac 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -271,8 +271,8 @@ def _test_processing_correctness_one( "microsoft/Florence-2-base", "adept/fuyu-8b", "google/gemma-3-4b-it", - "THUDM/glm-4v-9b", - "THUDM/GLM-4.1V-9B-Thinking", + "zai-org/glm-4v-9b", + "zai-org/GLM-4.1V-9B-Thinking", "ibm-granite/granite-speech-3.3-2b", "h2oai/h2ovl-mississippi-800m", "internlm/Intern-S1", diff --git a/tests/models/multimodal/processing/test_glm4_1v.py b/tests/models/multimodal/processing/test_glm4_1v.py index d1c5fa8fec6d..a6d900ec5d89 100644 --- a/tests/models/multimodal/processing/test_glm4_1v.py +++ b/tests/models/multimodal/processing/test_glm4_1v.py @@ -9,7 +9,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY from ...utils import build_model_context -@pytest.mark.parametrize("model_id", ["THUDM/GLM-4.1V-9B-Thinking"]) +@pytest.mark.parametrize("model_id", ["zai-org/GLM-4.1V-9B-Thinking"]) @pytest.mark.parametrize("expected_toks_per_frame", [299]) @pytest.mark.parametrize("num_frames", [32, 128]) @pytest.mark.parametrize("fps, expected_grid_t", [(1, 5), (2, 10)]) diff --git a/tests/models/registry.py b/tests/models/registry.py index 25cfa267d181..ffa6b755adf4 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -153,7 +153,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501 "BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m", {"1b": "bigscience/bloomz-1b1"}), - "ChatGLMModel": _HfExamplesInfo("THUDM/chatglm3-6b", + "ChatGLMModel": _HfExamplesInfo("zai-org/chatglm3-6b", trust_remote_code=True, max_transformers_version="4.48"), "ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3", # noqa: E501 @@ -187,8 +187,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { "Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"), "Gemma3nForConditionalGeneration": _HfExamplesInfo("google/gemma-3n-E2B-it", # noqa: E501 min_transformers_version="4.53"), - "GlmForCausalLM": _HfExamplesInfo("THUDM/glm-4-9b-chat-hf"), - "Glm4ForCausalLM": _HfExamplesInfo("THUDM/GLM-4-9B-0414"), + "GlmForCausalLM": _HfExamplesInfo("zai-org/glm-4-9b-chat-hf"), + "Glm4ForCausalLM": _HfExamplesInfo("zai-org/GLM-4-9B-0414"), "Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5", min_transformers_version="4.54"), # noqa: E501 "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", @@ -380,10 +380,10 @@ _MULTIMODAL_EXAMPLE_MODELS = { "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"), "Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"), "GraniteSpeechForConditionalGeneration": _HfExamplesInfo("ibm-granite/granite-speech-3.3-2b"), # noqa: E501 - "GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b", + "GLM4VForCausalLM": _HfExamplesInfo("zai-org/glm-4v-9b", trust_remote_code=True, hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501 - "Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking"), # noqa: E501 + "Glm4vForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.1V-9B-Thinking"), # noqa: E501 "Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air", is_available_online=False), # noqa: E501 "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m", diff --git a/tests/tokenization/test_cached_tokenizer.py b/tests/tokenization/test_cached_tokenizer.py index e218678c4363..07217611ea4d 100644 --- a/tests/tokenization/test_cached_tokenizer.py +++ b/tests/tokenization/test_cached_tokenizer.py @@ -10,7 +10,7 @@ from vllm.transformers_utils.tokenizer import (AnyTokenizer, get_cached_tokenizer) -@pytest.mark.parametrize("model_id", ["gpt2", "THUDM/chatglm3-6b"]) +@pytest.mark.parametrize("model_id", ["gpt2", "zai-org/chatglm3-6b"]) def test_cached_tokenizer(model_id: str): reference_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index 129f0942f14e..5470ff3e8b61 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project # Adapted from -# https://github.com/THUDM/ChatGLM2-6B +# https://github.com/zai-org/ChatGLM2-6B """Inference-only ChatGLM model compatible with THUDM weights.""" import json from collections.abc import Iterable @@ -86,10 +86,10 @@ class GLMAttention(nn.Module): prefix=f"{prefix}.dense", ) - # https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141 + # https://huggingface.co/zai-org/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141 rope_ratio = getattr(config, "rope_ratio", 1.0) max_positions = getattr(config, "seq_length", 8192) - # NOTE: THUDM/cogagent-9b-20241220 uses original_rope=False, + # NOTE: zai-org/cogagent-9b-20241220 uses original_rope=False, # which is equivalent to is_neox_style=True is_neox_style = not config.original_rope self.rotary_emb = get_rope( diff --git a/vllm/model_executor/models/glm4v.py b/vllm/model_executor/models/glm4v.py index 537aeabf72d5..1751fccd08b0 100644 --- a/vllm/model_executor/models/glm4v.py +++ b/vllm/model_executor/models/glm4v.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project # Adapted from -# https://github.com/THUDM/CogAgent +# https://github.com/zai-org/CogAgent """Inference-only CogAgent model compatible with THUDM weights.""" from argparse import Namespace from collections.abc import Mapping, Sequence diff --git a/vllm/test_utils.py b/vllm/test_utils.py index 1e61ca6b3dea..23679b8228d6 100644 --- a/vllm/test_utils.py +++ b/vllm/test_utils.py @@ -118,7 +118,7 @@ MODELS_ON_S3 = [ "stabilityai/stablelm-zephyr-3b", "state-spaces/mamba-130m-hf", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", - "THUDM/glm-4v-9b", + "zai-org/glm-4v-9b", "TIGER-Lab/Mantis-8B-siglip-llama3", "TIGER-Lab/VLM2Vec-Full", "tiiuae/falcon-40b", diff --git a/vllm/transformers_utils/configs/chatglm.py b/vllm/transformers_utils/configs/chatglm.py index 7c5de3e948ed..176d2b8f63fe 100644 --- a/vllm/transformers_utils/configs/chatglm.py +++ b/vllm/transformers_utils/configs/chatglm.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project # Adapted from -# https://github.com/THUDM/ChatGLM2-6B +# https://github.com/zai-org/ChatGLM2-6B from transformers import PretrainedConfig diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 6a31a4198069..d2be2ceeeae6 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -271,7 +271,7 @@ def get_tokenizer( } tokenizer.add_special_tokens(special_tokens_map) - # NOTE: We can remove this after https://github.com/THUDM/ChatGLM3/issues/1324 + # NOTE: We can remove this after https://github.com/zai-org/ChatGLM3/issues/1324 if type(tokenizer).__name__ in ("ChatGLMTokenizer", "ChatGLM4Tokenizer"): assert isinstance(tokenizer, PreTrainedTokenizer)