[Bugfix] Apply same sampling parameters for both n=1 and n>1 (#26005)

Signed-off-by: Kenichi Maehashi <maehashi@preferred.jp>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Kenichi Maehashi 2025-10-01 23:37:35 +09:00 committed by yewentao256
parent aeff0604bb
commit 0944358a90

View File

@ -290,7 +290,7 @@ class AsyncLLM(EngineClient):
return queue
# Fan out child requests (for n>1).
parent_request = ParentRequest(request_id, params)
parent_request = ParentRequest(request_id, request.sampling_params)
for idx in range(params.n):
request_id, params = parent_request.get_child_info(idx)
child_request = request if idx == params.n - 1 else copy(request)