remove GLM-4.5 quantization wrong Code (#21435)

This commit is contained in:
Yuxuan Zhang 2025-07-24 16:52:43 +08:00 committed by GitHub
parent 610852a423
commit 85bda9e7d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 2 additions and 3 deletions

View File

@ -20,7 +20,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer
logger = init_logger(__name__)
@ToolParserManager.register_module("glm4_moe")
@ToolParserManager.register_module("glm45")
class Glm4MoeModelToolParser(ToolParser):
def __init__(self, tokenizer: AnyTokenizer):

View File

@ -390,7 +390,6 @@ class Glm4MoeModel(nn.Module):
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
config.hidden_size,
quant_config=quant_config,
prefix=f"{prefix}.embed_tokens")
else:
self.embed_tokens = PPMissingLayer()

View File

@ -14,7 +14,7 @@ from vllm.reasoning import ReasoningParser, ReasoningParserManager
logger = init_logger(__name__)
@ReasoningParserManager.register_module("glm4_moe")
@ReasoningParserManager.register_module("glm45")
class Glm4MoeModelReasoningParser(ReasoningParser):
"""
Reasoning parser for the Glm4MoeModel model.