From 93d2be10b6a0900b176a3ad5612fd89c80f1651c Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Wed, 1 Oct 2025 19:31:39 -0700 Subject: [PATCH] [Misc] Make handling of SamplingParams clearer in n>1 case (#26032) Signed-off-by: Nick Hill Signed-off-by: yewentao256 --- vllm/v1/engine/async_llm.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 574f8b19c3790..36d0d50bf23db 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -289,13 +289,19 @@ class AsyncLLM(EngineClient): await self._add_request(request, prompt_str, None, 0, queue) return queue + # Get the updated SamplingParams from the request, which + # were cloned/updated in processor.process_inputs above. + parent_params = request.sampling_params + assert parent_params is not None + # Fan out child requests (for n>1). - parent_request = ParentRequest(request_id, request.sampling_params) - for idx in range(params.n): - request_id, params = parent_request.get_child_info(idx) - child_request = request if idx == params.n - 1 else copy(request) + parent_request = ParentRequest(request_id, parent_params) + for idx in range(parent_params.n): + request_id, child_params = parent_request.get_child_info(idx) + child_request = request if idx == parent_params.n - 1 else copy( + request) child_request.request_id = request_id - child_request.sampling_params = params + child_request.sampling_params = child_params await self._add_request(child_request, prompt_str, parent_request, idx, queue) return queue