From 66233af7b6e4217653f1a9952180d68376af7d2a Mon Sep 17 00:00:00 2001
From: Zhanwen Chen <phil.zhanwen.chen@gmail.com>
Date: Tue, 4 Mar 2025 00:09:22 -0500
Subject: [PATCH] Use math.prod instead of np.prod for trivial ops (#14142)

---
 vllm/worker/cache_engine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
index 3960392cf74ef..004b4e4b757fd 100644
--- a/vllm/worker/cache_engine.py
+++ b/vllm/worker/cache_engine.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """CacheEngine class for managing the KV cache."""
+from math import prod
 from typing import List
 
-import numpy as np
 import torch
 
 from vllm import envs
@@ -90,7 +90,7 @@ class CacheEngine:
             # NOTE this assumption currently only holds for MLA so we only apply
             # this optimization when `use_mla` is true
             entry_shape = kv_cache_shape[2:]
-            entry_size = np.prod(entry_shape)
+            entry_size = prod(entry_shape)
             alloc_entry_size = align_to_256bytes(entry_size, self.dtype)
             alloc_shape = (*kv_cache_shape[:2], alloc_entry_size)
         else: