mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-28 10:39:11 +08:00
update with main
Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com>
This commit is contained in:
parent
9a75278f2a
commit
0f85cfdc65
@ -26,6 +26,9 @@ class ModelArchConfigConvertorBase:
|
||||
self.hf_config = hf_config
|
||||
self.hf_text_config = hf_text_config
|
||||
|
||||
def get_architectures(self) -> list[str]:
|
||||
return getattr(self.hf_config, "architectures", [])
|
||||
|
||||
def get_num_hidden_layers(self) -> int:
|
||||
return getattr(self.hf_text_config, "num_hidden_layers", 0)
|
||||
|
||||
@ -240,7 +243,7 @@ class ModelArchConfigConvertorBase:
|
||||
|
||||
def convert(self, model_id: str, revision: str | None) -> ModelArchitectureConfig:
|
||||
model_arch_config = ModelArchitectureConfig(
|
||||
architectures=getattr(self.hf_config, "architectures", []),
|
||||
architectures=self.get_architectures(),
|
||||
model_type=self.hf_config.model_type,
|
||||
text_model_type=getattr(self.hf_text_config, "model_type", None),
|
||||
hidden_size=self.get_hidden_size(),
|
||||
@ -331,7 +334,15 @@ class NemotronNasModelArchConfigConvertor(ModelArchConfigConvertorBase):
|
||||
self.hf_text_config.num_attention_heads
|
||||
// block.attention.n_heads_in_group
|
||||
)
|
||||
raise RuntimeError("Couldn't determine number of kv heads")
|
||||
raise RuntimeError(
|
||||
"Could not determine the number of key-value attention heads "
|
||||
"from model configuration. "
|
||||
f"Architecture: {self.get_architectures()}. "
|
||||
"This usually indicates an unsupported model architecture or "
|
||||
"missing configuration. "
|
||||
"Please check if your model is supported at: "
|
||||
"https://docs.vllm.ai/en/latest/models/supported_models.html"
|
||||
)
|
||||
|
||||
|
||||
class DeepSeekMTPModelArchConfigConvertor(ModelArchConfigConvertorBase):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user