From 03b41b6cad7684cf86fadbda2501109422db53bd Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Wed, 28 May 2025 23:29:30 +0000 Subject: [PATCH] fix merge Signed-off-by: Bill Nell --- tests/kernels/moe/test_pplx_moe.py | 6 +++++- vllm/model_executor/layers/fused_moe/fused_batched_moe.py | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/kernels/moe/test_pplx_moe.py b/tests/kernels/moe/test_pplx_moe.py index 8c4a2c3fa440f..c10c5ba8127a9 100644 --- a/tests/kernels/moe/test_pplx_moe.py +++ b/tests/kernels/moe/test_pplx_moe.py @@ -63,7 +63,6 @@ requires_pplx = pytest.mark.skipif( reason="Requires PPLX kernels", ) - @dataclasses.dataclass class ProcessGroupInfo: world_size: int @@ -74,6 +73,11 @@ class ProcessGroupInfo: device: torch.device +@pytest.fixture(scope="function", autouse=True) +def use_pplx_backend(monkeypatch): + monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "pplx") + + def _worker_parallel_launch( local_rank: int, world_size: int, diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py index 8c575958b5b1b..c27333f4e704e 100644 --- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py @@ -429,8 +429,6 @@ class BatchedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): "apply_router_weight_on_input is only implemented for topk=1" a1.mul_(topk_weights.to(a1.dtype)) - _, block_k = self.block_shape - num_tokens, hidden_dim = a1.size() topk = topk_ids.size(1) @@ -453,6 +451,7 @@ class BatchedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): device=a1.device) if self.qtype is not None: + _, block_k = self.block_shape k_tiles = (hidden_dim + block_k - 1) // block_k b_a1_scale = torch.zeros( (num_local_experts, self.max_num_tokens, k_tiles),