From 5c54d9759d3e12d66919826bf1b7c196914d3a92 Mon Sep 17 00:00:00 2001 From: Abirdcfly Date: Fri, 1 Aug 2025 21:08:45 +0800 Subject: [PATCH] [Bugfix][PD] set max_completion_tokens=1 if req has this value (#21841) Signed-off-by: Abirdcfly --- .../online_serving/disaggregated_serving/disagg_proxy_demo.py | 2 ++ .../disagg_proxy_p2p_nccl_xpyd.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py index 16c32dcaa5d31..d39edb0b9d15c 100644 --- a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py +++ b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py @@ -293,6 +293,8 @@ class Proxy: # add params to request kv_prepare_request = request.copy() kv_prepare_request["max_tokens"] = 1 + if "max_completion_tokens" in kv_prepare_request: + kv_prepare_request["max_completion_tokens"] = 1 # prefill stage prefill_instance = self.schedule(self.prefill_cycler) diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py index a6fd92feb2f11..73da7af85f1d9 100644 --- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py +++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py @@ -128,6 +128,8 @@ async def handle_request(): prefill_request = original_request_data.copy() # change max_tokens = 1 to let it only do prefill prefill_request["max_tokens"] = 1 + if "max_completion_tokens" in prefill_request: + prefill_request["max_completion_tokens"] = 1 global count global prefill_instances