mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-11 21:07:10 +08:00
hacks
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
parent
a0efd3106c
commit
95c40f9b09
@ -635,7 +635,8 @@ def batched_moe_kernel_quantize_input(
|
||||
per_channel_quant: bool,
|
||||
block_shape: Optional[list[int]] = None,
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||
if (torch.compiler.is_compiling()
|
||||
if (True or
|
||||
torch.compiler.is_compiling()
|
||||
or torch.cuda.is_current_stream_capturing()):
|
||||
# Note: this does a bunch of extra work because expert_num_tokens is ignored
|
||||
# but it does support torch.compile + cudagraphs.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user