Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
Bill Nell 2025-05-30 02:33:58 +00:00
parent a0efd3106c
commit 95c40f9b09

View File

@ -635,7 +635,8 @@ def batched_moe_kernel_quantize_input(
per_channel_quant: bool,
block_shape: Optional[list[int]] = None,
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
if (torch.compiler.is_compiling()
if (True or
torch.compiler.is_compiling()
or torch.cuda.is_current_stream_capturing()):
# Note: this does a bunch of extra work because expert_num_tokens is ignored
# but it does support torch.compile + cudagraphs.