mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 19:06:17 +08:00
[BugFix] Raise error when max_model_len is larger than KV cache (#2163)
This commit is contained in:
parent
3ec8c25cd0
commit
8041b7305e
@ -227,6 +227,14 @@ class LLMEngine:
|
|||||||
raise ValueError("No available memory for the cache blocks. "
|
raise ValueError("No available memory for the cache blocks. "
|
||||||
"Try increasing `gpu_memory_utilization` when "
|
"Try increasing `gpu_memory_utilization` when "
|
||||||
"initializing the engine.")
|
"initializing the engine.")
|
||||||
|
max_seq_len = self.cache_config.block_size * num_gpu_blocks
|
||||||
|
if self.model_config.max_model_len > max_seq_len:
|
||||||
|
raise ValueError(
|
||||||
|
f"The model's max seq len ({self.model_config.max_model_len}) "
|
||||||
|
"is larger than the maximum number of tokens that can be "
|
||||||
|
f"stored in KV cache ({max_seq_len}). Try increasing "
|
||||||
|
"`gpu_memory_utilization` or decreasing `max_model_len` when "
|
||||||
|
"initializing the engine.")
|
||||||
|
|
||||||
self.cache_config.num_gpu_blocks = num_gpu_blocks
|
self.cache_config.num_gpu_blocks = num_gpu_blocks
|
||||||
self.cache_config.num_cpu_blocks = num_cpu_blocks
|
self.cache_config.num_cpu_blocks = num_cpu_blocks
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user