diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py index ec58a183061e..a6fd92feb2f1 100644 --- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py +++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py @@ -120,6 +120,7 @@ async def forward_request(url, data, request_id): @app.route("/v1/completions", methods=["POST"]) +@app.route("/v1/chat/completions", methods=["POST"]) async def handle_request(): try: original_request_data = await request.get_json() @@ -157,13 +158,13 @@ async def handle_request(): # finish prefill async for _ in forward_request( - f"http://{prefill_addr}/v1/completions", prefill_request, request_id + f"http://{prefill_addr}{request.path}", prefill_request, request_id ): continue # return decode generator = forward_request( - f"http://{decode_addr}/v1/completions", original_request_data, request_id + f"http://{decode_addr}{request.path}", original_request_data, request_id ) response = await make_response(generator) response.timeout = None