mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-03 07:17:03 +08:00
fix comments
Signed-off-by: inkcherry <mingzhi.liu@amd.com>
This commit is contained in:
parent
bba01338ca
commit
0b0c33d59e
@ -195,8 +195,9 @@ def example_round_robin_dp_loader(request_number, dp_size):
|
||||
@app.route("/v1/chat/completions", methods=["POST"])
|
||||
async def handle_request():
|
||||
try:
|
||||
global request_nums
|
||||
request_nums += 1
|
||||
with _list_lock:
|
||||
global request_nums
|
||||
request_nums += 1
|
||||
|
||||
def extract_ip_port_fast(url):
|
||||
match = IP_PORT_PATTERN.search(url)
|
||||
@ -210,6 +211,10 @@ async def handle_request():
|
||||
prefill_instance_endpoint = None
|
||||
decode_instance_endpoint = None
|
||||
|
||||
if not prefill_instances or not decode_instances:
|
||||
return await make_response(
|
||||
("Service Unavailable: No prefill or decode instances are registered.",
|
||||
503))
|
||||
pid = request_nums % len(prefill_instances)
|
||||
did = request_nums % len(decode_instances)
|
||||
prefill_instance_endpoint = prefill_instances[pid]
|
||||
@ -291,8 +296,11 @@ async def handle_request():
|
||||
response = await make_response(stream_generator)
|
||||
return response
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
logger.exception("An error occurred while handling the request: %s", e)
|
||||
return await make_response((
|
||||
f"Internal Server Error: {e!s}",
|
||||
500,
|
||||
))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -71,7 +71,7 @@ class MoRIIOConstants:
|
||||
COMPLETION_PREFIX = "cmpl"
|
||||
|
||||
PING_INTERVAL = 5
|
||||
MAX_PING_RETRIES = 1000000
|
||||
MAX_PING_RETRIES = 100
|
||||
DEFAULT_HANDSHAKE_PORT = "6301"
|
||||
DEFAULT_NOTIFY_PORT = "61005"
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user