Update tpu_worker.py 's typo (#17288)

This commit is contained in:
idouba 2025-04-28 19:01:15 +08:00 committed by GitHub
parent fa93cd9f60
commit 72c5b97231
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -163,8 +163,8 @@ class TPUWorker(LoRANotSupportedWorkerBase, LocalOrDistributedWorkerBase):
usable_memory_size = int(total_memory_size *
self.cache_config.gpu_memory_utilization)
tpu_kv_cache_bytes = max(usable_memory_size - profiled, 0)
dtype_btyes = get_dtype_size(self.cache_dtype)
block_size_bytes = (dtype_btyes * self.cache_config.block_size *
dtype_bytes = get_dtype_size(self.cache_dtype)
block_size_bytes = (dtype_bytes * self.cache_config.block_size *
num_layers * 2 * head_size * num_kv_heads)
num_tpu_blocks = tpu_kv_cache_bytes // block_size_bytes
num_tpu_blocks = (num_tpu_blocks // 8) * 8 # Round down to 8.