From 5d91d2b292be9b1d6b121d36d242d5077a031e4b Mon Sep 17 00:00:00 2001
From: maang-h <55082429+maang-h@users.noreply.github.com>
Date: Wed, 3 Dec 2025 07:23:09 +0800
Subject: [PATCH] [Doc] Add allocate_slots parameter docs (#29777)

Signed-off-by: maang <maang_h@163.com>
Signed-off-by: maang-h <55082429+maang-h@users.noreply.github.com>
Co-authored-by: Chen Zhang <zhangch99@outlook.com>
---
 vllm/v1/core/kv_cache_manager.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
index 3823384881cd3..33e8c81514c5f 100644
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -230,6 +230,9 @@ class KVCacheManager:
             delay_cache_blocks: Whether to skip caching the blocks. This is
                 used by P/D when allocating blocks used in a KV transfer
                 which will complete in a future step.
+            num_encoder_tokens: The number of encoder tokens to allocate for
+                cross-attention in encoder-decoder models(e.g., Whisper).
+                For decoder-only models, this should be 0.
 
         Blocks layout:
         ```