[Minor] avoid register new custom and just import silly_attn (#28578)

Signed-off-by: Boyuan Feng <boyuan@meta.com>
2025-12-10 06:35:00 +08:00 · 2025-11-14 01:32:31 -08:00 · 2025-11-14 01:32:31 -08:00 · fd75d3e8c0
commit fd75d3e8c0
parent c9a3a02149
1 changed files with 3 additions and 9 deletions
--- a/tests/compile/test_config.py
+++ b/tests/compile/test_config.py
@ -15,6 +15,9 @@ from vllm.engine.arg_utils import EngineArgs
 from vllm.platforms import current_platform
 from vllm.utils.torch_utils import _is_torch_equal_or_newer
 # This import automatically registers `torch.ops.silly.attention`
 from . import silly_attention  # noqa: F401
 def test_version():
    # Test the version comparison logic using the private function
@ -257,15 +260,6 @@ def test_should_split():
    splitting_ops = ["aten::add.Tensor"]
    assert not should_split(node, splitting_ops)
    @torch.library.custom_op(
        "silly::attention",
        mutates_args=["out"],
    )
    def attention(
        q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, out: torch.Tensor
    ) -> None:
        out.copy_(q + k + v)
    q, k, v, out = [torch.randn(1)] * 4
    # supports custom ops as OpOverloadPacket