mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 19:27:07 +08:00
change log level and fix some comments
Signed-off-by: clark <panf2333@gmail.com>
This commit is contained in:
parent
d6945ecdf0
commit
0cb2e05256
@ -84,22 +84,22 @@ async def execute_task_async(route: str, headers: dict, request: dict,
|
||||
headersJson.encode(),
|
||||
requestBody.encode()]),
|
||||
timeout=time_out)
|
||||
logger.info("Sent end")
|
||||
logger.debug("Sent end")
|
||||
while True:
|
||||
logger.info("Waiting for reply")
|
||||
logger.debug("Waiting for reply")
|
||||
[contentType,
|
||||
reply] = await asyncio.wait_for(sock.recv_multipart(),
|
||||
timeout=time_out)
|
||||
contentType_str = contentType.decode()
|
||||
reply_str = reply.decode()
|
||||
logger.info("Received result: %s, %s", contentType_str, reply_str)
|
||||
logger.debug("Received result: %s, %s", contentType_str, reply_str)
|
||||
yield (contentType_str, reply_str)
|
||||
if context_type_json == contentType_str:
|
||||
logger.info("Received %s message, return socket",
|
||||
logger.debug("Received %s message, return socket",
|
||||
contentType_str)
|
||||
break
|
||||
if "[DONE]" in reply_str:
|
||||
logger.info("Received stop signal, return socket")
|
||||
logger.debug("Received stop signal, return socket")
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(traceback.format_exc())
|
||||
@ -122,7 +122,7 @@ async def prefill(route: str, header: dict, original_request_data: dict):
|
||||
generator = execute_task_async(route, header, original_request_data,
|
||||
app.state.sockets_prefill)
|
||||
async for contentType, reply in generator:
|
||||
logger.info("contentType: %s, reply: %s", contentType, reply)
|
||||
logger.debug("contentType: %s, reply: %s", contentType, reply)
|
||||
if context_type_error == contentType:
|
||||
response = JSONResponse({"error": reply})
|
||||
response.status_code = 500
|
||||
@ -136,7 +136,7 @@ async def decode(route: str, header: dict, original_request_data: dict):
|
||||
app.state.sockets_decode)
|
||||
|
||||
async for contentType, reply in generator:
|
||||
logger.info("contentType: %s, reply: %s", contentType, reply)
|
||||
logger.debug("contentType: %s, reply: %s", contentType, reply)
|
||||
if context_type_error == contentType:
|
||||
response = JSONResponse({"error": reply})
|
||||
response.status_code = 500
|
||||
@ -158,20 +158,21 @@ async def chat_completions(request: Request):
|
||||
if header.get("X-Request-Id") is None:
|
||||
logger.info("add X-Request-Id: %s", x_request_id)
|
||||
header["X-Request-Id"] = x_request_id
|
||||
original_request_data = await request.json()
|
||||
logger.info("Received request: %s header: %s", original_request_data,
|
||||
request_data = await request.json()
|
||||
logger.info("Received request: %s header: %s", request_data,
|
||||
header)
|
||||
prefill_request = original_request_data.copy()
|
||||
original_max_tokens = request_data['max_tokens']
|
||||
# change max_tokens = 1 to let it only do prefill
|
||||
prefill_request['max_tokens'] = 1
|
||||
request_data['max_tokens'] = 1
|
||||
route = "/v1/completions"
|
||||
# finish prefill
|
||||
try:
|
||||
prefill_response = await prefill(route, header, prefill_request)
|
||||
prefill_response = await prefill(route, header, request_data)
|
||||
if isinstance(prefill_response, JSONResponse):
|
||||
return prefill_response
|
||||
logger.info("finish prefill start decode")
|
||||
response = await decode(route, header, original_request_data)
|
||||
request_data['max_tokens'] = original_max_tokens
|
||||
response = await decode(route, header, request_data)
|
||||
logger.info("finish decode")
|
||||
except Exception as e:
|
||||
logger.error("Error occurred in disagg prefill proxy server, %s",
|
||||
@ -231,6 +232,4 @@ if __name__ == "__main__":
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
uvloop.run(run_disagg_connector(args))
|
||||
|
||||
# uvicorn.run(app, host="0.0.0.0", port=fastapi_port)
|
||||
uvloop.run(run_disagg_connector(args))
|
||||
@ -97,8 +97,14 @@ async def serve_zmq(arg, zmq_server_port: int, app: FastAPI) -> None:
|
||||
# thread safety proxy create socket in the background:
|
||||
# https://pyzmq.readthedocs.io/en/latest/api/zmq.devices.html#proxy-devices
|
||||
thread_proxy = zmq.devices.ThreadProxy(zmq.ROUTER, zmq.DEALER)
|
||||
# unlimited HWM
|
||||
hwm_limit = 0
|
||||
thread_proxy.bind_in(clients_addr)
|
||||
thread_proxy.setsockopt_in(zmq.SNDHWM, hwm_limit)
|
||||
thread_proxy.setsockopt_in(zmq.RCVHWM, hwm_limit)
|
||||
thread_proxy.bind_out(workers_addr)
|
||||
thread_proxy.setsockopt_out(zmq.SNDHWM, hwm_limit)
|
||||
thread_proxy.setsockopt_out(zmq.RCVHWM, hwm_limit)
|
||||
thread_proxy.start()
|
||||
await asyncio.gather(*tasks)
|
||||
except KeyboardInterrupt:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user