[Bugfix][Misc] Use TritonPlaceholderModule to defensively import triton (#15099)

Signed-off-by: Mengqing Cao <cmq0113@163.com>
2026-03-17 02:57:08 +08:00 · 2025-04-25 13:51:02 +08:00 · 2025-04-25 13:51:02 +08:00 · 2f54045508
commit 2f54045508
parent 5aa6efb9a5
5 changed files with 53 additions and 6 deletions
--- a/benchmarks/kernels/benchmark_lora.py
+++ b/benchmarks/kernels/benchmark_lora.py
@ -17,8 +17,14 @@ from torch.utils.benchmark import Measurement as TMeasurement
 from utils import ArgPool, Bench, CudaGraphBenchParams
 from weight_shapes import WEIGHT_SHAPES

-from vllm.lora.ops.triton_ops import LoRAKernelMeta, lora_expand, lora_shrink
-from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT
+from vllm.triton_utils import HAS_TRITON
+
+if HAS_TRITON:
+    from vllm.lora.ops.triton_ops import (LoRAKernelMeta, lora_expand,
+                                          lora_shrink)
+    from vllm.lora.ops.triton_ops.utils import (_LORA_A_PTR_DICT,
+                                                _LORA_B_PTR_DICT)
+
 from vllm.utils import FlexibleArgumentParser

 DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())
--- a/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
+++ b/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
@ -10,8 +10,10 @@ from packaging import version

 from vllm import _custom_ops as ops
 from vllm.attention.backends.utils import PAD_SLOT_ID
+from vllm.triton_utils import HAS_TRITON

-TRITON3 = version.parse(triton.__version__) >= version.parse("3.0.0")
+TRITON3 = HAS_TRITON and (version.parse(triton.__version__)
+                          >= version.parse("3.0.0"))

 if TRITON3:

--- a/vllm/triton_utils/init.py
+++ b/vllm/triton_utils/init.py
@ -2,4 +2,4 @@

 from vllm.triton_utils.importing import HAS_TRITON

-__all__ = ["HAS_TRITON"]
+__all__ = ["HAS_TRITON"]
--- a/vllm/triton_utils/importing.py
+++ b/vllm/triton_utils/importing.py
@ -1,17 +1,53 @@
 # SPDX-License-Identifier: Apache-2.0

+import sys
+import types
 from importlib.util import find_spec

 from vllm.logger import init_logger
-from vllm.platforms import current_platform

 logger = init_logger(__name__)

 HAS_TRITON = (
    find_spec("triton") is not None
-    and not current_platform.is_xpu()  # Not compatible
+    or find_spec("pytorch-triton-xpu") is not None  # Not compatible
 )

 if not HAS_TRITON:
    logger.info("Triton not installed or not compatible; certain GPU-related"
                " functions will not be available.")
+
+    class TritonPlaceholder(types.ModuleType):
+
+        def __init__(self):
+            super().__init__("triton")
+            self.jit = self._dummy_decorator("jit")
+            self.autotune = self._dummy_decorator("autotune")
+            self.heuristics = self._dummy_decorator("heuristics")
+            self.language = TritonLanguagePlaceholder()
+            logger.warning_once(
+                "Triton is not installed. Using dummy decorators. "
+                "Install it via `pip install triton` to enable kernel"
+                "compilation.")
+
+        def _dummy_decorator(self, name):
+
+            def decorator(func=None, **kwargs):
+                if func is None:
+                    return lambda f: f
+                return func
+
+            return decorator
+
+    class TritonLanguagePlaceholder(types.ModuleType):
+
+        def __init__(self):
+            super().__init__("triton.language")
+            self.constexpr = None
+            self.dtype = None
+
+    sys.modules['triton'] = TritonPlaceholder()
+    sys.modules['triton.language'] = TritonLanguagePlaceholder()
+
+if 'triton' in sys.modules:
+    logger.info("Triton module has been replaced with a placeholder.")
--- a/vllm/utils.py
+++ b/vllm/utils.py
@ -63,6 +63,9 @@ from torch.library import Library
 from typing_extensions import Never, ParamSpec, TypeIs, assert_never

 import vllm.envs as envs
+# NOTE: import triton_utils to make TritonPlaceholderModule work
+#       if triton is unavailable
+import vllm.triton_utils  # noqa: F401
 from vllm.logger import enable_trace_function_call, init_logger

 if TYPE_CHECKING: