diff --git a/benchmarks/disagg_benchmarks/zmq/test_request.py b/benchmarks/disagg_benchmarks/zmq/test_request.py index b881aca790526..aac4cf8310712 100644 --- a/benchmarks/disagg_benchmarks/zmq/test_request.py +++ b/benchmarks/disagg_benchmarks/zmq/test_request.py @@ -61,6 +61,7 @@ async def test_connect_completions(session): except aiohttp.ClientError as e: print(f"Error: {e}") + def is_json(data): try: json.loads(data) @@ -68,6 +69,7 @@ def is_json(data): except ValueError: return False + def extract_data(responseText): reply = "" if responseText == "": diff --git a/vllm/entrypoints/disagg_connector.py b/vllm/entrypoints/disagg_connector.py index a79b37658268e..843aa5ae9e649 100644 --- a/vllm/entrypoints/disagg_connector.py +++ b/vllm/entrypoints/disagg_connector.py @@ -113,6 +113,7 @@ async def generate_stream_response(fisrt_reply: str, async for _, reply in generator: yield reply + async def prefill(route: str, header: dict, original_request_data: dict): logger.info("start prefill") generator = execute_task_async(route, header, original_request_data, @@ -124,7 +125,8 @@ async def prefill(route: str, header: dict, original_request_data: dict): response.status_code = 500 return response return True - + + async def decode(route: str, header: dict, original_request_data: dict): logger.info("start decode") generator = execute_task_async(route, header, original_request_data,