diff --git a/vllm/lora/ops/triton_ops/lora_expand_op.py b/vllm/lora/ops/triton_ops/lora_expand_op.py index eacc6fb46ebd7..e41ae1d9594a7 100644 --- a/vllm/lora/ops/triton_ops/lora_expand_op.py +++ b/vllm/lora/ops/triton_ops/lora_expand_op.py @@ -204,7 +204,6 @@ def _lora_expand( NUM_WARPS = 4 NUM_CTAS = 1 NUM_STAGES = 2 - MAX_NREG = None EVEN_K = K % BLOCK_K == 0 # type: ignore @@ -258,7 +257,6 @@ def _lora_expand( num_warps=NUM_WARPS, num_ctas=NUM_CTAS, num_stages=NUM_STAGES, - maxnreg=MAX_NREG, ) return diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index 82331939d859b..fb0422cf0b0ee 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -168,7 +168,6 @@ def _lora_shrink( NUM_WARPS = 4 NUM_CTAS = 1 NUM_STAGES = 2 - MAX_NREG = None EVEN_K = K % (BLOCK_K * SPLIT_K) == 0 # type: ignore @@ -213,7 +212,6 @@ def _lora_shrink( num_warps=NUM_WARPS, num_ctas=NUM_CTAS, num_stages=NUM_STAGES, - maxnreg=MAX_NREG, ) return