[BugFix] Increase timeout for startup failure test (#17642)

Signed-off-by: Nick Hill <nhill@redhat.com>
2025-12-11 17:44:58 +08:00 · 2025-05-05 13:53:19 -07:00 · 2025-05-05 13:53:19 -07:00 · 5ea5c514da
commit 5ea5c514da
parent d3efde8176
1 changed files with 21 additions and 14 deletions
--- a/tests/v1/engine/test_engine_core_client.py
+++ b/tests/v1/engine/test_engine_core_client.py
@ -1,12 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 import asyncio
 import os
 import signal
 import time
 import uuid
 from threading import Thread
 from typing import Optional
 import psutil
 import pytest
 from transformers import AutoTokenizer
@ -17,8 +18,8 @@ from vllm.platforms import current_platform
 from vllm.usage.usage_lib import UsageContext
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.core import EngineCore
-from vllm.v1.engine.core_client import (AsyncMPClient, EngineCoreClient,
+from vllm.v1.engine.core_client import (AsyncMPClient, CoreEngine,
-                                        SyncMPClient)
+                                        EngineCoreClient, SyncMPClient)
 from vllm.v1.executor.abstract import Executor
 from ...distributed.conftest import MockSubscriber
@ -337,34 +338,40 @@ def test_kv_cache_events(
                "Token ids should be the same as the custom tokens")
        finally:
            client.shutdown()
        return
-@pytest.mark.timeout(10)
+@pytest.mark.timeout(20)
 def test_startup_failure(monkeypatch: pytest.MonkeyPatch):
    with monkeypatch.context() as m, pytest.raises(Exception) as e_info:
        m.setenv("VLLM_USE_V1", "1")
        # Monkey-patch to extract core process pid while it's starting.
        core_proc_pid = [None]
        ce_ctor = CoreEngine.__init__
        def patched_ce_ctor(self, *args, **kwargs):
            ce_ctor(self, *args, **kwargs)
            core_proc_pid[0] = self.proc_handle.proc.pid
        m.setattr(CoreEngine, "__init__", patched_ce_ctor)
        t = time.time()
        engine_args = EngineArgs(model=MODEL_NAME)
        vllm_config = engine_args.create_engine_config(
            usage_context=UsageContext.UNKNOWN_CONTEXT)
        executor_class = Executor.get_class(vllm_config)
        print(f"VllmConfig creation took {time.time() - t:.2f} seconds.")
        # Start another thread to wait for engine core process to start
        # and kill it - simulate fatal uncaught process exit.
        this_proc = psutil.Process()
        children_before = set(this_proc.children())
        def kill_first_child():
-            while True:
+            while (child_pid := core_proc_pid[0]) is None:
                time.sleep(0.5)
-                children = set(this_proc.children()) - children_before
+            print(f"Killing child core process {child_pid}")
-                if children:
+            assert isinstance(child_pid, int)
-                    child = children.pop()
+            os.kill(child_pid, signal.SIGKILL)
                    print("Killing child core process", child.pid)
                    child.kill()
                    break
        Thread(target=kill_first_child, daemon=True).start()