mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 06:15:00 +08:00
[PD] let p2p nccl toy proxy handle /chat/completions (#21734)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
0ae970ed15
commit
63fe3a700f
@ -120,6 +120,7 @@ async def forward_request(url, data, request_id):
|
|||||||
|
|
||||||
|
|
||||||
@app.route("/v1/completions", methods=["POST"])
|
@app.route("/v1/completions", methods=["POST"])
|
||||||
|
@app.route("/v1/chat/completions", methods=["POST"])
|
||||||
async def handle_request():
|
async def handle_request():
|
||||||
try:
|
try:
|
||||||
original_request_data = await request.get_json()
|
original_request_data = await request.get_json()
|
||||||
@ -157,13 +158,13 @@ async def handle_request():
|
|||||||
|
|
||||||
# finish prefill
|
# finish prefill
|
||||||
async for _ in forward_request(
|
async for _ in forward_request(
|
||||||
f"http://{prefill_addr}/v1/completions", prefill_request, request_id
|
f"http://{prefill_addr}{request.path}", prefill_request, request_id
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# return decode
|
# return decode
|
||||||
generator = forward_request(
|
generator = forward_request(
|
||||||
f"http://{decode_addr}/v1/completions", original_request_data, request_id
|
f"http://{decode_addr}{request.path}", original_request_data, request_id
|
||||||
)
|
)
|
||||||
response = await make_response(generator)
|
response = await make_response(generator)
|
||||||
response.timeout = None
|
response.timeout = None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user