hacks

Signed-off-by: Bill Nell <bnell@redhat.com>
2026-07-11 20:57:21 +08:00 · 2025-05-30 02:33:58 +00:00 · 2025-05-30 02:33:58 +00:00 · 95c40f9b09
commit 95c40f9b09
parent a0efd3106c
1 changed files with 2 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
@ -635,7 +635,8 @@ def batched_moe_kernel_quantize_input(
    per_channel_quant: bool,
    block_shape: Optional[list[int]] = None,
 ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
-    if (torch.compiler.is_compiling()
+    if (True or
+        torch.compiler.is_compiling()
        or torch.cuda.is_current_stream_capturing()):
        # Note: this does a bunch of extra work because expert_num_tokens is ignored
        # but it does support torch.compile + cudagraphs.