wip

Signed-off-by: Amir Samani <asamani@nvidia.com>
2026-06-01 01:37:04 +08:00 · 2025-11-24 13:48:31 -08:00 · 2025-11-24 13:48:31 -08:00 · dfb5ce44dc
commit dfb5ce44dc
parent 695d78c471
1 changed files with 3 additions and 3 deletions
--- a/tests/v1/cudagraph/test_cudagraph_dispatch.py
+++ b/tests/v1/cudagraph/test_cudagraph_dispatch.py
@ -166,7 +166,7 @@ class TestCUDAGraphWrapper:
            self.model, self.vllm_config, runtime_mode=CUDAGraphMode.FULL
        )
        batch_descriptor = BatchDescriptor(num_tokens=10)
-
+        stream = torch.cuda.Stream()
        # 0. global warmup
        with (
            set_forward_context(
@ -175,7 +175,7 @@ class TestCUDAGraphWrapper:
                cudagraph_runtime_mode=CUDAGraphMode.NONE,
                batch_descriptor=None,
            ),
-            graph_capture(device=torch.device("cuda")) as graph_ctx,
+            torch.cuda.stream(stream),
        ):
            wrapper(self.input_tensor)
@ -188,7 +188,7 @@ class TestCUDAGraphWrapper:
                batch_descriptor=batch_descriptor,
            ),
            patch("torch.cuda.graph", wraps=torch.cuda.graph) as mock_cuda_graph,
-            graph_capture(device=torch.device("cuda")) as graph_ctx,
+            torch.cuda.stream(stream),
        ):
            output1 = wrapper(self.input_tensor)
            # capturing phase should generate a zero output