mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-26 11:47:07 +08:00
fix comments
Signed-off-by: inkcherry <mingzhi.liu@amd.com>
This commit is contained in:
parent
bba01338ca
commit
0b0c33d59e
@ -195,8 +195,9 @@ def example_round_robin_dp_loader(request_number, dp_size):
|
|||||||
@app.route("/v1/chat/completions", methods=["POST"])
|
@app.route("/v1/chat/completions", methods=["POST"])
|
||||||
async def handle_request():
|
async def handle_request():
|
||||||
try:
|
try:
|
||||||
global request_nums
|
with _list_lock:
|
||||||
request_nums += 1
|
global request_nums
|
||||||
|
request_nums += 1
|
||||||
|
|
||||||
def extract_ip_port_fast(url):
|
def extract_ip_port_fast(url):
|
||||||
match = IP_PORT_PATTERN.search(url)
|
match = IP_PORT_PATTERN.search(url)
|
||||||
@ -210,6 +211,10 @@ async def handle_request():
|
|||||||
prefill_instance_endpoint = None
|
prefill_instance_endpoint = None
|
||||||
decode_instance_endpoint = None
|
decode_instance_endpoint = None
|
||||||
|
|
||||||
|
if not prefill_instances or not decode_instances:
|
||||||
|
return await make_response(
|
||||||
|
("Service Unavailable: No prefill or decode instances are registered.",
|
||||||
|
503))
|
||||||
pid = request_nums % len(prefill_instances)
|
pid = request_nums % len(prefill_instances)
|
||||||
did = request_nums % len(decode_instances)
|
did = request_nums % len(decode_instances)
|
||||||
prefill_instance_endpoint = prefill_instances[pid]
|
prefill_instance_endpoint = prefill_instances[pid]
|
||||||
@ -291,8 +296,11 @@ async def handle_request():
|
|||||||
response = await make_response(stream_generator)
|
response = await make_response(stream_generator)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
logger.exception("An error occurred while handling the request: %s", e)
|
||||||
pass
|
return await make_response((
|
||||||
|
f"Internal Server Error: {e!s}",
|
||||||
|
500,
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -71,7 +71,7 @@ class MoRIIOConstants:
|
|||||||
COMPLETION_PREFIX = "cmpl"
|
COMPLETION_PREFIX = "cmpl"
|
||||||
|
|
||||||
PING_INTERVAL = 5
|
PING_INTERVAL = 5
|
||||||
MAX_PING_RETRIES = 1000000
|
MAX_PING_RETRIES = 100
|
||||||
DEFAULT_HANDSHAKE_PORT = "6301"
|
DEFAULT_HANDSHAKE_PORT = "6301"
|
||||||
DEFAULT_NOTIFY_PORT = "61005"
|
DEFAULT_NOTIFY_PORT = "61005"
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user