[platform] support pytorch custom op pluggable (#11328)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2026-04-30 20:56:37 +08:00 · 2025-01-10 18:02:38 +08:00 · 2025-01-10 18:02:38 +08:00 · ef725feafc
commit ef725feafc
parent d907be7dc7
2 changed files with 11 additions and 0 deletions
--- a/vllm/model_executor/custom_op.py
+++ b/vllm/model_executor/custom_op.py
@ -57,6 +57,11 @@ class CustomOp(nn.Module):
        # PyTorch-native implementation.
        return self.forward_native(*args, **kwargs)

+    def forward_oot(self, *args, **kwargs):
+        # By default, we assume that OOT ops are compatible with the
+        # PyTorch-native implementation.
+        return self.forward_native(*args, **kwargs)
+
    def dispatch_forward(self):
        # NOTE(woosuk): Here we assume that vLLM was built for only one
        # specific backend. Currently, we do not support dynamic dispatching.
@ -81,6 +86,8 @@ class CustomOp(nn.Module):
            return self.forward_tpu
        elif current_platform.is_xpu():
            return self.forward_xpu
+        elif current_platform.is_out_of_tree():
+            return self.forward_oot
        else:
            return self.forward_cuda

--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@ -45,6 +45,7 @@ class PlatformEnum(enum.Enum):
    CPU = enum.auto()
    NEURON = enum.auto()
    OPENVINO = enum.auto()
+    OOT = enum.auto()
    UNSPECIFIED = enum.auto()


@ -107,6 +108,9 @@ class Platform:
    def is_openvino(self) -> bool:
        return self._enum == PlatformEnum.OPENVINO

+    def is_out_of_tree(self) -> bool:
+        return self._enum == PlatformEnum.OOT
+
    def is_cuda_alike(self) -> bool:
        """Stateless version of :func:`torch.cuda.is_available`."""
        return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)