[Minor] avoid register new custom and just import silly_attn (#28578)

Signed-off-by: Boyuan Feng <boyuan@meta.com>
This commit is contained in:
Boyuan Feng 2025-11-14 01:32:31 -08:00 committed by GitHub
parent c9a3a02149
commit fd75d3e8c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -15,6 +15,9 @@ from vllm.engine.arg_utils import EngineArgs
from vllm.platforms import current_platform
from vllm.utils.torch_utils import _is_torch_equal_or_newer
# This import automatically registers `torch.ops.silly.attention`
from . import silly_attention # noqa: F401
def test_version():
# Test the version comparison logic using the private function
@ -257,15 +260,6 @@ def test_should_split():
splitting_ops = ["aten::add.Tensor"]
assert not should_split(node, splitting_ops)
@torch.library.custom_op(
"silly::attention",
mutates_args=["out"],
)
def attention(
q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, out: torch.Tensor
) -> None:
out.copy_(q + k + v)
q, k, v, out = [torch.randn(1)] * 4
# supports custom ops as OpOverloadPacket