mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 00:49:09 +08:00
fix(tests): Ensure reliable CUDA cache clearing in MoE test (#23416)
Signed-off-by: AzizCode92 <azizbenothman76@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
424fb7a5d2
commit
341923b982
@ -429,11 +429,11 @@ def test_mixtral_moe(dtype: torch.dtype, padding: bool, use_rocm_aiter: bool,
|
|||||||
vllm_moe.experts.w13_weight, (0, 128), "constant", 0)[...,
|
vllm_moe.experts.w13_weight, (0, 128), "constant", 0)[...,
|
||||||
0:-128],
|
0:-128],
|
||||||
requires_grad=False)
|
requires_grad=False)
|
||||||
torch.cuda.empty_cache()
|
|
||||||
vllm_moe.experts.w2_weight = Parameter(F.pad(
|
vllm_moe.experts.w2_weight = Parameter(F.pad(
|
||||||
vllm_moe.experts.w2_weight, (0, 128), "constant", 0)[...,
|
vllm_moe.experts.w2_weight, (0, 128), "constant", 0)[...,
|
||||||
0:-128],
|
0:-128],
|
||||||
requires_grad=False)
|
requires_grad=False)
|
||||||
|
torch.cuda.synchronize()
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
# Run forward passes for both MoE blocks
|
# Run forward passes for both MoE blocks
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user