1. connect_parser set --prefill-addr and --decode-addr are required

2.To more accurately reflect its purpose, we will rename connect.py to disagg_connector.py.

Signed-off-by: clark <panf2333@gmail.com>
This commit is contained in:
clark 2025-01-08 12:22:35 +08:00
parent bfde1688e7
commit 6e1fba8a73

View File

@ -1,3 +1,4 @@
# SPDX-License-Identifier: Apache-2.0
import json
import uvicorn
import zmq
@ -86,7 +87,7 @@ async def chat_completions(request: Request):
prefill_request['max_tokens'] = 1
route = "/v1/completions"
# finish prefill
async for x in execute_task_async(route, header, prefill_request, app.state.sockets_prefill):
async for _ in execute_task_async(route, header, prefill_request, app.state.sockets_prefill):
continue
# return decode
@ -101,16 +102,14 @@ async def chat_completions(request: Request):
logger.error("".join(traceback.format_exception(*exc_info)))
async def run_connect(args, **uvicorn_kwargs) -> None:
logger.info("vLLM Connect start %s", args)
logger.info(f"start connect {args} {uvicorn_kwargs}")
async def run_disagg_connector(args, **uvicorn_kwargs) -> None:
logger.info(f"vLLM Disaggregate Connector start {args} {uvicorn_kwargs}")
logger.info(args.prefill_addr)
app.state.prefill_addr = f"tcp://{args.prefill_addr}" if args.prefill_addr is not None else url_prefill
app.state.decode_addr = f"tcp://{args.decode_addr}" if args.decode_addr is not None else url_decode
logger.info(f"start connect url_prefill: {app.state.prefill_addr} url_decode: {app.state.decode_addr}")
def signal_handler(*_) -> None:
# Interrupt server on sigterm while initializing
raise KeyboardInterrupt("terminated")