mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 04:35:01 +08:00
[ROCm][CI] Increase the memory threshold for test_deep_sleep_fp8_kvcache (#30104)
Signed-off-by: charlifu <charlifu@amd.com>
This commit is contained in:
parent
5867819eaf
commit
2c22c4ca2d
@ -260,13 +260,18 @@ def test_deep_sleep_fp8_kvcache():
|
|||||||
llm.sleep(level=2)
|
llm.sleep(level=2)
|
||||||
|
|
||||||
used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
|
used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
|
||||||
assert used_bytes < 3 * GiB_bytes
|
|
||||||
|
# Rocm uses more memory for CudaGraphs, so we add 2 GiB more for the threshold
|
||||||
|
rocm_extra_mem_bytes = 2 * GiB_bytes if current_platform.is_rocm() else 0
|
||||||
|
mem_threshold_after_sleep = 3 * GiB_bytes + rocm_extra_mem_bytes
|
||||||
|
assert used_bytes < mem_threshold_after_sleep
|
||||||
|
|
||||||
llm.wake_up(tags=["weights"])
|
llm.wake_up(tags=["weights"])
|
||||||
llm.collective_rpc("reload_weights")
|
llm.collective_rpc("reload_weights")
|
||||||
|
|
||||||
used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
|
used_bytes = current_platform.get_current_memory_usage() - used_bytes_baseline
|
||||||
assert used_bytes < 4 * GiB_bytes
|
mem_threshold_after_wake_up = 4 * GiB_bytes + rocm_extra_mem_bytes
|
||||||
|
assert used_bytes < mem_threshold_after_wake_up
|
||||||
|
|
||||||
# now allocate kv cache and cuda graph memory
|
# now allocate kv cache and cuda graph memory
|
||||||
llm.wake_up(tags=["kv_cache"])
|
llm.wake_up(tags=["kv_cache"])
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user