mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 22:25:32 +08:00
disable graph partition in custom op (#26952)
Signed-off-by: Boyuan Feng <boyuan@meta.com> Signed-off-by: Boyuan Feng <fby.1994@gmail.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
parent
ab81379ea6
commit
08405609cc
@ -49,6 +49,7 @@ from vllm.model_executor.layers.fused_moe.utils import (
|
||||
from vllm.model_executor.layers.quantization.utils.mxfp4_utils import dequant_mxfp4
|
||||
from vllm.model_executor.layers.quantization.utils.mxfp6_utils import dequant_mxfp6
|
||||
from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import OCP_MX_Scheme
|
||||
from vllm.model_executor.utils import maybe_disable_graph_partition
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.triton_utils import tl, triton
|
||||
from vllm.utils import direct_register_custom_op, is_torch_equal_or_newer
|
||||
@ -1145,7 +1146,11 @@ def fused_topk_bias(
|
||||
|
||||
|
||||
# This is used by the Deepseek-V2 and Deepseek-V3 model
|
||||
@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
|
||||
@torch.compile(
|
||||
dynamic=True,
|
||||
backend=current_platform.simple_compile_backend,
|
||||
options=maybe_disable_graph_partition(current_platform.simple_compile_backend),
|
||||
)
|
||||
def grouped_topk(
|
||||
hidden_states: torch.Tensor,
|
||||
gating_output: torch.Tensor,
|
||||
|
||||
@ -7,6 +7,8 @@ from typing import Any
|
||||
|
||||
import torch
|
||||
|
||||
from vllm.utils import is_torch_equal_or_newer
|
||||
|
||||
|
||||
def set_random_seed(seed: int) -> None:
|
||||
from vllm.platforms import current_platform
|
||||
@ -83,3 +85,10 @@ def get_moe_expert_mapping(
|
||||
if child_map is not None:
|
||||
return child_map()
|
||||
return []
|
||||
|
||||
|
||||
def maybe_disable_graph_partition(current_backend: str) -> dict[str, bool]:
|
||||
if current_backend == "inductor" and is_torch_equal_or_newer("2.9.0.dev"):
|
||||
return {"graph_partition": False}
|
||||
else:
|
||||
return {}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user