[Bugfix] Backport request id validation to v0 (#11036)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
2026-07-17 19:07:12 +08:00 · 2024-12-10 09:38:23 -07:00 · 2024-12-10 09:38:23 -07:00 · 9b9cef3145
commit 9b9cef3145
parent d05f88679b
2 changed files with 5 additions and 1 deletions
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@ -576,6 +576,10 @@ class MQLLMEngineClient(EngineClient):
        if self._errored_with is not None:
            raise ENGINE_DEAD_ERROR(self._errored_with)

+        # Ensure the request id is unique among running requests
+        if request_id in self.output_queues:
+            raise ValueError(f"Request {request_id} already exists")
+
        # Constructing guided decoding logits processors is expensive, so we do
        # it here to avoid contending with cpu resources and the GIL on the
        # backend process.
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@ -152,7 +152,7 @@ class AsyncLLM(EngineClient):
        """Add new request to the AsyncLLM."""

        if self.detokenizer.is_request_active(request_id):
-            raise KeyError(f"Request {request_id} already exists.")
+            raise ValueError(f"Request {request_id} already exists.")

        # 1) Create a new AsyncStream for the request.
        stream = self._add_request_to_streams(request_id)