[PD] let p2p nccl toy proxy handle /chat/completions (#21734)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey 2025-07-28 19:45:50 +08:00 committed by GitHub
parent 0ae970ed15
commit 63fe3a700f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -120,6 +120,7 @@ async def forward_request(url, data, request_id):
@app.route("/v1/completions", methods=["POST"])
@app.route("/v1/chat/completions", methods=["POST"])
async def handle_request():
try:
original_request_data = await request.get_json()
@ -157,13 +158,13 @@ async def handle_request():
# finish prefill
async for _ in forward_request(
f"http://{prefill_addr}/v1/completions", prefill_request, request_id
f"http://{prefill_addr}{request.path}", prefill_request, request_id
):
continue
# return decode
generator = forward_request(
f"http://{decode_addr}/v1/completions", original_request_data, request_id
f"http://{decode_addr}{request.path}", original_request_data, request_id
)
response = await make_response(generator)
response.timeout = None