mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-05 05:24:37 +08:00
Fix vLLM x torch.compile config caching (#16491)
Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
parent
b4fe16c75b
commit
b590adfdc1
@ -298,12 +298,18 @@ class ModelConfig:
|
|||||||
factors.append(self.quantization)
|
factors.append(self.quantization)
|
||||||
factors.append(self.revision)
|
factors.append(self.revision)
|
||||||
factors.append(self.code_revision)
|
factors.append(self.code_revision)
|
||||||
|
factors.append(self.max_model_len)
|
||||||
|
factors.append(self.max_logprobs)
|
||||||
|
factors.append(self.disable_sliding_window)
|
||||||
factors.append(self.trust_remote_code)
|
factors.append(self.trust_remote_code)
|
||||||
|
factors.append(self.mm_processor_kwargs)
|
||||||
|
factors.append(self.generation_config)
|
||||||
|
factors.append(self.model_impl)
|
||||||
|
factors.append(self.override_generation_config)
|
||||||
factors.append(self.rope_scaling)
|
factors.append(self.rope_scaling)
|
||||||
factors.append(self.rope_theta)
|
factors.append(self.rope_theta)
|
||||||
# rope cos/sin cache depends on the max_position_embeddings
|
# hf_config can control how the model looks!
|
||||||
factors.append(
|
factors.append(self.hf_config.to_json_string())
|
||||||
getattr(self.hf_config, "max_position_embeddings", "None"))
|
|
||||||
return hashlib.sha256(str(factors).encode()).hexdigest()
|
return hashlib.sha256(str(factors).encode()).hexdigest()
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user