diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
index df60cb130a1b3..a4ff01feb2a72 100644
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -78,7 +78,6 @@ class EagleProposer:
         self.use_cuda_graph = (self.vllm_config.compilation_config.level
                                == CompilationLevel.PIECEWISE and
                                not self.vllm_config.model_config.enforce_eager)
-
         self.cudagraph_runtime_mode = (CUDAGraphMode.PIECEWISE
                                        if self.use_cuda_graph else
                                        CUDAGraphMode.NONE)
@@ -678,12 +677,14 @@ class EagleProposer:
     def dummy_run(
         self,
         num_tokens: int,
+        use_cudagraphs=True,
     ) -> None:
         with set_forward_context(
                 None,
                 self.vllm_config,
                 num_tokens=num_tokens,
-                cudagraph_runtime_mode=self.cudagraph_runtime_mode,
+                cudagraph_runtime_mode=self.cudagraph_runtime_mode \
+                    if use_cudagraphs else CUDAGraphMode.NONE,
         ):
             if self.is_multimodal_model:
                 input_ids = None
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index f256dc160a6b5..f32a9d9a610cd 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2997,7 +2997,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
 
             if self.speculative_config and self.speculative_config.use_eagle():
                 assert isinstance(self.drafter, EagleProposer)
-                self.drafter.dummy_run(num_tokens)
+                # For warmup runs don't use cudagraphs in drafter
+                self.drafter.dummy_run(num_tokens, use_cudagraphs=False)
 
         # This is necessary to avoid blocking DP.
         # For dummy runs, we typically skip EPLB since we don't have any real