diff --git a/tests/compile/piecewise/test_multiple_graphs.py b/tests/compile/piecewise/test_multiple_graphs.py index d70dc8811db8d..1f46ca1f9cfe8 100644 --- a/tests/compile/piecewise/test_multiple_graphs.py +++ b/tests/compile/piecewise/test_multiple_graphs.py @@ -16,8 +16,7 @@ from vllm.config import (CompilationConfig, CompilationLevel, VllmConfig, from vllm.envs import VLLM_USE_V1 from vllm.forward_context import set_forward_context -# Import shared test operations -# The standard attention operation is automatically registered when imported +# This import automatically registers torch ops for testing (like silly.attention) import tests.compile.test_operations BATCH_SIZE = 32 @@ -320,9 +319,5 @@ def test_multi_graph_piecewise_compile_outputs_equal(): ): outputs.append(run_model(vllm_config, model, inputs)) - # Generally don't expect outputs with and without inductor - # to be bitwise equivalent - assert torch.allclose(outputs[0], outputs[1]) - # Expect bitwise equivalence using inductor w/ and w/o cudagraph assert torch.equal(outputs[0], outputs[2]) \ No newline at end of file diff --git a/tests/compile/piecewise/test_simple.py b/tests/compile/piecewise/test_simple.py index f235bf4a6fbbf..51bab00739da7 100644 --- a/tests/compile/piecewise/test_simple.py +++ b/tests/compile/piecewise/test_simple.py @@ -15,7 +15,7 @@ from vllm.config import (CompilationConfig, CompilationLevel, CUDAGraphMode, from vllm.envs import VLLM_USE_V1 from vllm.forward_context import BatchDescriptor, set_forward_context -# Import shared test operations +# This import also automatically registers torch ops for testing (like silly.attention) from tests.compile.test_operations import ( get_global_counter, reset_global_counter ) diff --git a/tests/compile/piecewise/test_toy_llama.py b/tests/compile/piecewise/test_toy_llama.py index b2889f03a8061..075877bb6652e 100644 --- a/tests/compile/piecewise/test_toy_llama.py +++ b/tests/compile/piecewise/test_toy_llama.py @@ -470,4 +470,4 @@ def benchmark(): if __name__ == "__main__": - benchmark() \ No newline at end of file + pass \ No newline at end of file diff --git a/tests/compile/test_operations.py b/tests/compile/test_operations.py index 32aa4108e6b2a..5b25cf58ab580 100644 --- a/tests/compile/test_operations.py +++ b/tests/compile/test_operations.py @@ -3,14 +3,7 @@ """ Shared PyTorch custom operations for compilation tests. -This module provides a centralized place to define and register custom -PyTorch operations used across multiple compilation tests. This avoids -duplicate operation registrations that would cause RuntimeErrors when -running tests together. - -The main "attention" operation is automatically registered when this module -is imported. Individual test files can access the global counter functionality -through helper functions. +Centralizes custom operation definitions to avoid duplicate registrations. """ import torch @@ -23,7 +16,7 @@ from vllm.utils import direct_register_custom_op silly_lib = Library("silly", "FRAGMENT") -# Global counter that all tests can use or ignore +# Global counter that counts the number of times attention is invoked _global_counter = 0