[BugFix] Fix spec decode + structured outputs + preemption edge case (#30916)

Signed-off-by: Nick Hill <nhill@redhat.com>
2025-12-24 08:15:02 +08:00 · 2025-12-18 12:59:55 -08:00 · 2025-12-18 12:59:55 -08:00 · b0b77c4655
commit b0b77c4655
parent 634a14bd7d
1 changed files with 5 additions and 1 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -3393,9 +3393,13 @@ class GPUModelRunner(
        return async_output

    def take_draft_token_ids(self) -> DraftTokenIds | None:
-        if self._draft_token_ids is None:
+        if not self.num_spec_tokens:
            return None
+
        req_ids = self.input_batch.req_ids
+        if self._draft_token_ids is None:
+            return DraftTokenIds(req_ids, [[] for _ in req_ids])
+
        if isinstance(self._draft_token_ids, torch.Tensor):
            draft_token_ids = self._draft_token_ids.tolist()
        else: