mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 07:37:03 +08:00
Address PR feedback: simplify comments, remove extra assertion, and improve docstrings
Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com>
This commit is contained in:
parent
865b0bfafd
commit
91735e9c1c
@ -16,8 +16,7 @@ from vllm.config import (CompilationConfig, CompilationLevel, VllmConfig,
|
||||
from vllm.envs import VLLM_USE_V1
|
||||
from vllm.forward_context import set_forward_context
|
||||
|
||||
# Import shared test operations
|
||||
# The standard attention operation is automatically registered when imported
|
||||
# This import automatically registers torch ops for testing (like silly.attention)
|
||||
import tests.compile.test_operations
|
||||
|
||||
BATCH_SIZE = 32
|
||||
@ -320,9 +319,5 @@ def test_multi_graph_piecewise_compile_outputs_equal():
|
||||
):
|
||||
outputs.append(run_model(vllm_config, model, inputs))
|
||||
|
||||
# Generally don't expect outputs with and without inductor
|
||||
# to be bitwise equivalent
|
||||
assert torch.allclose(outputs[0], outputs[1])
|
||||
|
||||
# Expect bitwise equivalence using inductor w/ and w/o cudagraph
|
||||
assert torch.equal(outputs[0], outputs[2])
|
||||
@ -15,7 +15,7 @@ from vllm.config import (CompilationConfig, CompilationLevel, CUDAGraphMode,
|
||||
from vllm.envs import VLLM_USE_V1
|
||||
from vllm.forward_context import BatchDescriptor, set_forward_context
|
||||
|
||||
# Import shared test operations
|
||||
# This import also automatically registers torch ops for testing (like silly.attention)
|
||||
from tests.compile.test_operations import (
|
||||
get_global_counter, reset_global_counter
|
||||
)
|
||||
|
||||
@ -470,4 +470,4 @@ def benchmark():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
benchmark()
|
||||
pass
|
||||
@ -3,14 +3,7 @@
|
||||
"""
|
||||
Shared PyTorch custom operations for compilation tests.
|
||||
|
||||
This module provides a centralized place to define and register custom
|
||||
PyTorch operations used across multiple compilation tests. This avoids
|
||||
duplicate operation registrations that would cause RuntimeErrors when
|
||||
running tests together.
|
||||
|
||||
The main "attention" operation is automatically registered when this module
|
||||
is imported. Individual test files can access the global counter functionality
|
||||
through helper functions.
|
||||
Centralizes custom operation definitions to avoid duplicate registrations.
|
||||
"""
|
||||
|
||||
import torch
|
||||
@ -23,7 +16,7 @@ from vllm.utils import direct_register_custom_op
|
||||
silly_lib = Library("silly", "FRAGMENT")
|
||||
|
||||
|
||||
# Global counter that all tests can use or ignore
|
||||
# Global counter that counts the number of times attention is invoked
|
||||
_global_counter = 0
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user