From 89fa54e6f7a0789161ab73f604fdec5be6b57e58 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 05:54:20 +0800 Subject: [PATCH] [Optimization] Use a cheaper cache key in `get_model_architecture` (#25682) Signed-off-by: DarkLight1337 --- vllm/model_executor/model_loader/utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/model_loader/utils.py b/vllm/model_executor/model_loader/utils.py index e007d431880e..03202e13c280 100644 --- a/vllm/model_executor/model_loader/utils.py +++ b/vllm/model_executor/model_loader/utils.py @@ -165,7 +165,7 @@ def device_loading_context(module: torch.nn.Module, # New parameters or parameters already on target device are untouched -_MODEL_ARCH_BY_HASH = dict[str, tuple[type[nn.Module], str]]() +_MODEL_ARCH_BY_HASH = dict[int, tuple[type[nn.Module], str]]() """Caches the outputs of `_get_model_architecture`.""" @@ -215,7 +215,14 @@ def _get_model_architecture( def get_model_architecture( model_config: ModelConfig) -> tuple[type[nn.Module], str]: - key = model_config.compute_hash() + key = hash(( + model_config.model, + model_config.convert_type, + model_config.runner_type, + model_config.trust_remote_code, + model_config.model_impl, + tuple(getattr(model_config.hf_config, "architectures", [])), + )) if key in _MODEL_ARCH_BY_HASH: return _MODEL_ARCH_BY_HASH[key]