From 63fe3a700fd23aa7b2cb43cbd1cb7af960832603 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Mon, 28 Jul 2025 19:45:50 +0800 Subject: [PATCH] [PD] let p2p nccl toy proxy handle /chat/completions (#21734) Signed-off-by: chaunceyjiang --- .../disagg_proxy_p2p_nccl_xpyd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py index ec58a183061e..a6fd92feb2f1 100644 --- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py +++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py @@ -120,6 +120,7 @@ async def forward_request(url, data, request_id): @app.route("/v1/completions", methods=["POST"]) +@app.route("/v1/chat/completions", methods=["POST"]) async def handle_request(): try: original_request_data = await request.get_json() @@ -157,13 +158,13 @@ async def handle_request(): # finish prefill async for _ in forward_request( - f"http://{prefill_addr}/v1/completions", prefill_request, request_id + f"http://{prefill_addr}{request.path}", prefill_request, request_id ): continue # return decode generator = forward_request( - f"http://{decode_addr}/v1/completions", original_request_data, request_id + f"http://{decode_addr}{request.path}", original_request_data, request_id ) response = await make_response(generator) response.timeout = None