mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 21:05:01 +08:00
remove GLM-4.5 quantization wrong Code (#21435)
This commit is contained in:
parent
610852a423
commit
85bda9e7d0
@ -20,7 +20,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer
|
|||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ToolParserManager.register_module("glm4_moe")
|
@ToolParserManager.register_module("glm45")
|
||||||
class Glm4MoeModelToolParser(ToolParser):
|
class Glm4MoeModelToolParser(ToolParser):
|
||||||
|
|
||||||
def __init__(self, tokenizer: AnyTokenizer):
|
def __init__(self, tokenizer: AnyTokenizer):
|
||||||
|
|||||||
@ -390,7 +390,6 @@ class Glm4MoeModel(nn.Module):
|
|||||||
self.embed_tokens = VocabParallelEmbedding(
|
self.embed_tokens = VocabParallelEmbedding(
|
||||||
config.vocab_size,
|
config.vocab_size,
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
quant_config=quant_config,
|
|
||||||
prefix=f"{prefix}.embed_tokens")
|
prefix=f"{prefix}.embed_tokens")
|
||||||
else:
|
else:
|
||||||
self.embed_tokens = PPMissingLayer()
|
self.embed_tokens = PPMissingLayer()
|
||||||
|
|||||||
@ -14,7 +14,7 @@ from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
|||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ReasoningParserManager.register_module("glm4_moe")
|
@ReasoningParserManager.register_module("glm45")
|
||||||
class Glm4MoeModelReasoningParser(ReasoningParser):
|
class Glm4MoeModelReasoningParser(ReasoningParser):
|
||||||
"""
|
"""
|
||||||
Reasoning parser for the Glm4MoeModel model.
|
Reasoning parser for the Glm4MoeModel model.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user