mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-02 12:58:02 +08:00
[Bugfix] Fix cpu usage and cache hit stats reporting on cpu environment (#18674)
Signed-off-by: zzzyq <zhangyuqi94@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
parent
279f854519
commit
f2faac745d
@ -1650,6 +1650,20 @@ class LLMEngine:
|
|||||||
gpu_prefix_cache_hit_rate = self.scheduler[
|
gpu_prefix_cache_hit_rate = self.scheduler[
|
||||||
0].get_prefix_cache_hit_rate(Device.GPU)
|
0].get_prefix_cache_hit_rate(Device.GPU)
|
||||||
|
|
||||||
|
# Exchange the uasge and cache hit stats between gpu and cpu when
|
||||||
|
# running on cpu because the cpu_worker.py intentionally reports the
|
||||||
|
# number of cpu blocks as gpu blocks in favor of cache management.
|
||||||
|
if self.device_config.device_type == "cpu":
|
||||||
|
num_total_gpu, num_total_cpu = num_total_cpu, num_total_gpu
|
||||||
|
gpu_cache_usage_sys, cpu_cache_usage_sys = (
|
||||||
|
cpu_cache_usage_sys,
|
||||||
|
gpu_cache_usage_sys,
|
||||||
|
)
|
||||||
|
gpu_prefix_cache_hit_rate, cpu_prefix_cache_hit_rate = (
|
||||||
|
cpu_prefix_cache_hit_rate,
|
||||||
|
gpu_prefix_cache_hit_rate,
|
||||||
|
)
|
||||||
|
|
||||||
# Iteration stats
|
# Iteration stats
|
||||||
num_prompt_tokens_iter = 0
|
num_prompt_tokens_iter = 0
|
||||||
num_generation_tokens_iter = 0
|
num_generation_tokens_iter = 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user