mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-05 15:02:22 +08:00
fix test_simple_inductor_graph_partition (#26522)
Signed-off-by: Boyuan Feng <boyuan@meta.com>
This commit is contained in:
parent
29255cfc3b
commit
b545a0b207
@ -143,10 +143,14 @@ def test_simple_piecewise_compile(use_inductor):
|
|||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
@pytest.mark.parametrize("splitting_ops", [["silly.attention"], []])
|
@pytest.mark.parametrize("splitting_ops", [["silly.attention"], []])
|
||||||
def test_simple_inductor_graph_partition(splitting_ops):
|
def test_simple_inductor_graph_partition(splitting_ops, monkeypatch):
|
||||||
if not is_torch_equal_or_newer("2.9.0.dev"):
|
if not is_torch_equal_or_newer("2.9.0.dev"):
|
||||||
pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
|
pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
|
||||||
|
|
||||||
|
# disable compile cache so that we run separately for different splitting_ops
|
||||||
|
# and get the expected number of cudagraphs captured.
|
||||||
|
monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
|
||||||
|
|
||||||
_run_simple_model(
|
_run_simple_model(
|
||||||
# Inductor graph partition automatically resets splitting_ops to an empty list
|
# Inductor graph partition automatically resets splitting_ops to an empty list
|
||||||
splitting_ops=splitting_ops,
|
splitting_ops=splitting_ops,
|
||||||
|
|||||||
@ -332,7 +332,10 @@ class InductorAdaptor(CompilerInterface):
|
|||||||
nonlocal file_path
|
nonlocal file_path
|
||||||
compiled_fn = inductor_compiled_graph.current_callable
|
compiled_fn = inductor_compiled_graph.current_callable
|
||||||
file_path = compiled_fn.__code__.co_filename # noqa
|
file_path = compiled_fn.__code__.co_filename # noqa
|
||||||
if not file_path.startswith(self.base_cache_dir):
|
if (
|
||||||
|
not file_path.startswith(self.base_cache_dir)
|
||||||
|
and compiled_fn.__closure__ is not None
|
||||||
|
):
|
||||||
# hooked in the align_inputs_from_check_idxs function
|
# hooked in the align_inputs_from_check_idxs function
|
||||||
# in torch/_inductor/utils.py
|
# in torch/_inductor/utils.py
|
||||||
for cell in compiled_fn.__closure__:
|
for cell in compiled_fn.__closure__:
|
||||||
@ -359,7 +362,10 @@ class InductorAdaptor(CompilerInterface):
|
|||||||
nonlocal file_path
|
nonlocal file_path
|
||||||
compiled_fn = inductor_compiled_graph.current_callable
|
compiled_fn = inductor_compiled_graph.current_callable
|
||||||
file_path = compiled_fn.__code__.co_filename # noqa
|
file_path = compiled_fn.__code__.co_filename # noqa
|
||||||
if not file_path.startswith(self.base_cache_dir):
|
if (
|
||||||
|
not file_path.startswith(self.base_cache_dir)
|
||||||
|
and compiled_fn.__closure__ is not None
|
||||||
|
):
|
||||||
# hooked in the align_inputs_from_check_idxs function
|
# hooked in the align_inputs_from_check_idxs function
|
||||||
# in torch/_inductor/utils.py
|
# in torch/_inductor/utils.py
|
||||||
for cell in compiled_fn.__closure__:
|
for cell in compiled_fn.__closure__:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user