From b590adfdc15fc716f6d120aeefeb587f491f8fce Mon Sep 17 00:00:00 2001 From: Richard Zou Date: Tue, 15 Apr 2025 02:11:11 -0400 Subject: [PATCH] Fix vLLM x torch.compile config caching (#16491) Signed-off-by: rzou --- vllm/config.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index f86c3272a0ad5..60ea4a517bde9 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -298,12 +298,18 @@ class ModelConfig: factors.append(self.quantization) factors.append(self.revision) factors.append(self.code_revision) + factors.append(self.max_model_len) + factors.append(self.max_logprobs) + factors.append(self.disable_sliding_window) factors.append(self.trust_remote_code) + factors.append(self.mm_processor_kwargs) + factors.append(self.generation_config) + factors.append(self.model_impl) + factors.append(self.override_generation_config) factors.append(self.rope_scaling) factors.append(self.rope_theta) - # rope cos/sin cache depends on the max_position_embeddings - factors.append( - getattr(self.hf_config, "max_position_embeddings", "None")) + # hf_config can control how the model looks! + factors.append(self.hf_config.to_json_string()) return hashlib.sha256(str(factors).encode()).hexdigest() def __init__(