mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 08:55:46 +08:00
[PD] let p2p nccl toy proxy handle /chat/completions (#21734)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
0ae970ed15
commit
63fe3a700f
@ -120,6 +120,7 @@ async def forward_request(url, data, request_id):
|
||||
|
||||
|
||||
@app.route("/v1/completions", methods=["POST"])
|
||||
@app.route("/v1/chat/completions", methods=["POST"])
|
||||
async def handle_request():
|
||||
try:
|
||||
original_request_data = await request.get_json()
|
||||
@ -157,13 +158,13 @@ async def handle_request():
|
||||
|
||||
# finish prefill
|
||||
async for _ in forward_request(
|
||||
f"http://{prefill_addr}/v1/completions", prefill_request, request_id
|
||||
f"http://{prefill_addr}{request.path}", prefill_request, request_id
|
||||
):
|
||||
continue
|
||||
|
||||
# return decode
|
||||
generator = forward_request(
|
||||
f"http://{decode_addr}/v1/completions", original_request_data, request_id
|
||||
f"http://{decode_addr}{request.path}", original_request_data, request_id
|
||||
)
|
||||
response = await make_response(generator)
|
||||
response.timeout = None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user