diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py index ae072a47e7f9..fd8d1fd7ff48 100644 --- a/tests/v1/engine/test_engine_core_client.py +++ b/tests/v1/engine/test_engine_core_client.py @@ -1,12 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio +import os +import signal import time import uuid from threading import Thread from typing import Optional -import psutil import pytest from transformers import AutoTokenizer @@ -17,8 +18,8 @@ from vllm.platforms import current_platform from vllm.usage.usage_lib import UsageContext from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine.core import EngineCore -from vllm.v1.engine.core_client import (AsyncMPClient, EngineCoreClient, - SyncMPClient) +from vllm.v1.engine.core_client import (AsyncMPClient, CoreEngine, + EngineCoreClient, SyncMPClient) from vllm.v1.executor.abstract import Executor from ...distributed.conftest import MockSubscriber @@ -337,34 +338,40 @@ def test_kv_cache_events( "Token ids should be the same as the custom tokens") finally: client.shutdown() - return -@pytest.mark.timeout(10) +@pytest.mark.timeout(20) def test_startup_failure(monkeypatch: pytest.MonkeyPatch): with monkeypatch.context() as m, pytest.raises(Exception) as e_info: m.setenv("VLLM_USE_V1", "1") + # Monkey-patch to extract core process pid while it's starting. + core_proc_pid = [None] + ce_ctor = CoreEngine.__init__ + + def patched_ce_ctor(self, *args, **kwargs): + ce_ctor(self, *args, **kwargs) + core_proc_pid[0] = self.proc_handle.proc.pid + + m.setattr(CoreEngine, "__init__", patched_ce_ctor) + + t = time.time() engine_args = EngineArgs(model=MODEL_NAME) vllm_config = engine_args.create_engine_config( usage_context=UsageContext.UNKNOWN_CONTEXT) executor_class = Executor.get_class(vllm_config) + print(f"VllmConfig creation took {time.time() - t:.2f} seconds.") # Start another thread to wait for engine core process to start # and kill it - simulate fatal uncaught process exit. - this_proc = psutil.Process() - children_before = set(this_proc.children()) def kill_first_child(): - while True: + while (child_pid := core_proc_pid[0]) is None: time.sleep(0.5) - children = set(this_proc.children()) - children_before - if children: - child = children.pop() - print("Killing child core process", child.pid) - child.kill() - break + print(f"Killing child core process {child_pid}") + assert isinstance(child_pid, int) + os.kill(child_pid, signal.SIGKILL) Thread(target=kill_first_child, daemon=True).start()