mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:35:28 +08:00
[Optimization] Use a cheaper cache key in get_model_architecture (#25682)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
3d54bdcb73
commit
89fa54e6f7
@ -165,7 +165,7 @@ def device_loading_context(module: torch.nn.Module,
|
|||||||
# New parameters or parameters already on target device are untouched
|
# New parameters or parameters already on target device are untouched
|
||||||
|
|
||||||
|
|
||||||
_MODEL_ARCH_BY_HASH = dict[str, tuple[type[nn.Module], str]]()
|
_MODEL_ARCH_BY_HASH = dict[int, tuple[type[nn.Module], str]]()
|
||||||
"""Caches the outputs of `_get_model_architecture`."""
|
"""Caches the outputs of `_get_model_architecture`."""
|
||||||
|
|
||||||
|
|
||||||
@ -215,7 +215,14 @@ def _get_model_architecture(
|
|||||||
|
|
||||||
def get_model_architecture(
|
def get_model_architecture(
|
||||||
model_config: ModelConfig) -> tuple[type[nn.Module], str]:
|
model_config: ModelConfig) -> tuple[type[nn.Module], str]:
|
||||||
key = model_config.compute_hash()
|
key = hash((
|
||||||
|
model_config.model,
|
||||||
|
model_config.convert_type,
|
||||||
|
model_config.runner_type,
|
||||||
|
model_config.trust_remote_code,
|
||||||
|
model_config.model_impl,
|
||||||
|
tuple(getattr(model_config.hf_config, "architectures", [])),
|
||||||
|
))
|
||||||
if key in _MODEL_ARCH_BY_HASH:
|
if key in _MODEL_ARCH_BY_HASH:
|
||||||
return _MODEL_ARCH_BY_HASH[key]
|
return _MODEL_ARCH_BY_HASH[key]
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user