From 03b41b6cad7684cf86fadbda2501109422db53bd Mon Sep 17 00:00:00 2001
From: Bill Nell <bnell@redhat.com>
Date: Wed, 28 May 2025 23:29:30 +0000
Subject: [PATCH] fix merge

Signed-off-by: Bill Nell <bnell@redhat.com>
---
 tests/kernels/moe/test_pplx_moe.py                        | 6 +++++-
 vllm/model_executor/layers/fused_moe/fused_batched_moe.py | 3 +--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/kernels/moe/test_pplx_moe.py b/tests/kernels/moe/test_pplx_moe.py
index 8c4a2c3fa440f..c10c5ba8127a9 100644
--- a/tests/kernels/moe/test_pplx_moe.py
+++ b/tests/kernels/moe/test_pplx_moe.py
@@ -63,7 +63,6 @@ requires_pplx = pytest.mark.skipif(
     reason="Requires PPLX kernels",
 )
 
-
 @dataclasses.dataclass
 class ProcessGroupInfo:
     world_size: int
@@ -74,6 +73,11 @@ class ProcessGroupInfo:
     device: torch.device
 
 
+@pytest.fixture(scope="function", autouse=True)
+def use_pplx_backend(monkeypatch):
+    monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "pplx")
+
+
 def _worker_parallel_launch(
     local_rank: int,
     world_size: int,
diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
index 8c575958b5b1b..c27333f4e704e 100644
--- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
@@ -429,8 +429,6 @@ class BatchedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
                 "apply_router_weight_on_input is only implemented for topk=1"
             a1.mul_(topk_weights.to(a1.dtype))
 
-        _, block_k = self.block_shape
-
         num_tokens, hidden_dim = a1.size()
         topk = topk_ids.size(1)
 
@@ -453,6 +451,7 @@ class BatchedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
             device=a1.device)
 
         if self.qtype is not None:
+            _, block_k = self.block_shape
             k_tiles = (hidden_dim + block_k - 1) // block_k
             b_a1_scale = torch.zeros(
                 (num_local_experts, self.max_num_tokens, k_tiles),