mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-24 07:37:04 +08:00
71 lines
2.9 KiB
Python
71 lines
2.9 KiB
Python
import time
|
|
import zmq
|
|
import zmq.asyncio
|
|
import random
|
|
import asyncio
|
|
|
|
|
|
async def worker_routine(worker_url: str,
|
|
context: zmq.asyncio.Context = None, i: int = 0):
|
|
"""Worker routine"""
|
|
# Socket to talk to dispatcher
|
|
socket = context.socket(zmq.DEALER)
|
|
socket.setsockopt(zmq.IDENTITY, f"worker-{i}-{time.time()}".encode())
|
|
socket.connect(worker_url)
|
|
print(f"worker-{i} {worker_url} started")
|
|
while True:
|
|
identity, url, headers, string = await socket.recv_multipart()
|
|
print(f"worker-{i} Received request identity: [{identity} ]")
|
|
print(f"worker-{i} Received request url: [{url} ]")
|
|
print(f"worker-{i} Received request headers: [{headers} ]")
|
|
print(f"worker-{i} Received request string: [{string} ]")
|
|
streamreply = ['{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}',
|
|
'{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]}',
|
|
'{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}'
|
|
]
|
|
for j in range(len(streamreply)):
|
|
# Do some 'work'
|
|
# time.sleep(random.randint(1, 5))
|
|
await asyncio.sleep(random.randint(1, 5))
|
|
# Send reply back to client
|
|
reply = f"worker-{i} reply part-{j} {string} \n {streamreply[j]}"
|
|
# reply = streamreply[j]
|
|
print(f"worker-{i} Sent reply: [{identity} {reply} ]")
|
|
await socket.send_multipart([identity, reply.encode()])
|
|
|
|
async def main():
|
|
"""Server routine"""
|
|
|
|
url_worker = "inproc://workers"
|
|
url_client = "tcp://localhost:5556"
|
|
|
|
# Prepare our context and sockets
|
|
context = zmq.asyncio.Context()
|
|
|
|
# Socket to talk to clients
|
|
clients = context.socket(zmq.ROUTER)
|
|
clients.bind(url_client)
|
|
print("Server ROUTER started at", url_client)
|
|
# Socket to talk to workers
|
|
workers = context.socket(zmq.DEALER)
|
|
workers.bind(url_worker)
|
|
print("Worker DEALER started at", url_worker)
|
|
|
|
tasks = [asyncio.create_task(worker_routine(url_worker, context, i)) for i in range(5)]
|
|
proxy_task = asyncio.to_thread(zmq.proxy, clients, workers)
|
|
|
|
try:
|
|
await asyncio.gather(*tasks, proxy_task)
|
|
except KeyboardInterrupt:
|
|
print("Server interrupted")
|
|
except zmq.ZMQError as e:
|
|
print("ZMQError:", e)
|
|
finally:
|
|
# We never get here but clean up anyhow
|
|
clients.close()
|
|
workers.close()
|
|
context.term()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |