From 28d0396ff16e6c72b83d055b33c864702fc02ce7 Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Sun, 23 Mar 2025 21:54:04 +0000 Subject: [PATCH] updated Signed-off-by: rshaw@neuralmagic.com --- vllm/entrypoints/disaggregated/worker.py | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 vllm/entrypoints/disaggregated/worker.py diff --git a/vllm/entrypoints/disaggregated/worker.py b/vllm/entrypoints/disaggregated/worker.py new file mode 100644 index 0000000000000..348d46db75d8c --- /dev/null +++ b/vllm/entrypoints/disaggregated/worker.py @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: Apache-2.0 + +import uvloop + +from vllm.disaggregated.pd_worker import PDWorker +from vllm.engine.async_llm_engine import AsyncEngineArgs +from vllm.engine.protocol import EngineClient +from vllm.entrypoints.openai.api_server import build_async_engine_client +from vllm.logger import init_logger +from vllm.utils import FlexibleArgumentParser +from vllm.version import __version__ as VLLM_VERSION + +logger = init_logger(__name__) + + +async def run(args, engine: EngineClient): + try: + worker = PDWorker(engine, args.worker_addr, args.client_addr) + await worker.run_busy_loop() + finally: + worker.shutdown() + + +async def main(args) -> None: + logger.info("vLLM P/D Worker Server %s", VLLM_VERSION) + logger.info("Args: %s", args) + + args.disable_frontend_multiprocessing = False + async with build_async_engine_client(args) as engine: + await run(args, engine) + + +if __name__ == "__main__": + parser = FlexibleArgumentParser() + parser.add_argument('--client-addr', + type=str, + required=True, + help='The address of the connector.') + parser.add_argument('--worker-addr', + type=str, + required=True, + help='The address of the worker.') + AsyncEngineArgs.add_cli_args(parser) + uvloop.run(main(parser.parse_args()))