From 0fa673af4c2aeaf6d21a0163dc55218f5ee1daa6 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Fri, 26 Sep 2025 02:12:33 +0800
Subject: [PATCH] [V0 deprecation] Clean up LoRA  (#25686)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 vllm/lora/punica_wrapper/punica_gpu.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/vllm/lora/punica_wrapper/punica_gpu.py b/vllm/lora/punica_wrapper/punica_gpu.py
index 2db0e9fee142..467f50050eb2 100644
--- a/vllm/lora/punica_wrapper/punica_gpu.py
+++ b/vllm/lora/punica_wrapper/punica_gpu.py
@@ -11,7 +11,6 @@ from typing import Optional, Union, final
 
 import torch
 
-import vllm.envs as envs
 from vllm.lora.layers import LoRAMapping
 from vllm.triton_utils import HAS_TRITON
 
@@ -41,14 +40,8 @@ class PunicaWrapperGPU(PunicaWrapperBase):
                                                       max_num_batched_tokens,
                                                       device=device)
 
-        # When cudagraph capture size is greater than max_num_seqs (max_batches,
-        # here), V0 captures the graph as if max_num_seqs is set to
-        # the capture size.
-        # V1 doesn't have this problem and always respects max_num_seqs.
-        max_num_prompts = (max_batches
-                           if envs.VLLM_USE_V1 else max_num_batched_tokens)
         self.prompt_mapping_meta = LoRAKernelMeta.make(self.max_loras,
-                                                       max_num_prompts,
+                                                       max_batches,
                                                        device=device)
 
     def update_metadata(self, mapping: LoRAMapping,