mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 15:07:08 +08:00
hacks
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
parent
a0efd3106c
commit
95c40f9b09
@ -635,7 +635,8 @@ def batched_moe_kernel_quantize_input(
|
|||||||
per_channel_quant: bool,
|
per_channel_quant: bool,
|
||||||
block_shape: Optional[list[int]] = None,
|
block_shape: Optional[list[int]] = None,
|
||||||
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
|
||||||
if (torch.compiler.is_compiling()
|
if (True or
|
||||||
|
torch.compiler.is_compiling()
|
||||||
or torch.cuda.is_current_stream_capturing()):
|
or torch.cuda.is_current_stream_capturing()):
|
||||||
# Note: this does a bunch of extra work because expert_num_tokens is ignored
|
# Note: this does a bunch of extra work because expert_num_tokens is ignored
|
||||||
# but it does support torch.compile + cudagraphs.
|
# but it does support torch.compile + cudagraphs.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user