Skip to content

Commit fbbb3a7

Browse files
[Core][AMD] Propagate shutdown timeout to MultiprocExecutor (vllm-project#43154)
Signed-off-by: Ryan Rock <ryan.rock@amd.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent b3a5c41 commit fbbb3a7

5 files changed

Lines changed: 71 additions & 2 deletions

File tree

tests/v1/engine/test_core_engine_actor_manager.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88
from pathlib import Path
99
from types import SimpleNamespace
1010
from typing import Any
11+
from unittest.mock import Mock
1112

1213
import pytest
1314
import ray
1415
import zmq
1516

1617
from vllm.utils.network_utils import make_zmq_socket, split_zmq_path
1718
from vllm.v1.engine.core import EngineCoreActorMixin
19+
from vllm.v1.engine.core_client import BackgroundResources
1820
from vllm.v1.engine.utils import (
1921
CoreEngineActorManager,
2022
EngineZmqAddresses,
@@ -99,6 +101,17 @@ class _DummyExecutor:
99101
pass
100102

101103

104+
def test_background_resources_passes_worker_shutdown_timeout(
105+
monkeypatch: pytest.MonkeyPatch,
106+
) -> None:
107+
timeout = 7
108+
monkeypatch.setenv("VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS", str(timeout))
109+
engine_manager = Mock()
110+
resources = BackgroundResources(ctx=None, engine_manager=engine_manager)
111+
resources()
112+
engine_manager.shutdown.assert_called_once_with(timeout=timeout)
113+
114+
102115
def _make_vllm_config() -> SimpleNamespace:
103116
return SimpleNamespace(
104117
parallel_config=SimpleNamespace(

tests/v1/executor/test_executor.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from vllm.sampling_params import SamplingParams
1515
from vllm.v1.engine.async_llm import AsyncLLM
1616
from vllm.v1.engine.llm_engine import LLMEngine
17+
from vllm.v1.executor import multiproc_executor as multiproc_executor_module
1718
from vllm.v1.executor.abstract import Executor
1819
from vllm.v1.executor.multiproc_executor import MultiprocExecutor
1920
from vllm.v1.executor.uniproc_executor import (
@@ -43,6 +44,50 @@ def test_supports_async_scheduling_multiproc_executor():
4344
assert MultiprocExecutor.supports_async_scheduling() is True
4445

4546

47+
class _FakeClock:
48+
def __init__(self) -> None:
49+
self.now = 0.0
50+
51+
def time(self) -> float:
52+
return self.now
53+
54+
def sleep(self, seconds: float) -> None:
55+
self.now += seconds
56+
57+
58+
class _FakeProcess:
59+
def __init__(self, clock: _FakeClock, exits_at: float) -> None:
60+
self.clock = clock
61+
self.exits_at = exits_at
62+
self.terminate_called = False
63+
64+
def is_alive(self) -> bool:
65+
return self.clock.time() < self.exits_at
66+
67+
def terminate(self) -> None:
68+
self.terminate_called = True
69+
70+
71+
@pytest.mark.parametrize(
72+
("timeout", "exits_at", "expected_terminate"),
73+
[
74+
pytest.param(6, 5, False, id="worker-exits-before-timeout"),
75+
pytest.param(6, 7, True, id="worker-exceeds-timeout"),
76+
],
77+
)
78+
def test_multiproc_executor_worker_termination_timeout(
79+
monkeypatch, timeout, exits_at, expected_terminate
80+
):
81+
monkeypatch.setenv("VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS", str(timeout))
82+
clock = _FakeClock()
83+
monkeypatch.setattr(multiproc_executor_module.time, "time", clock.time)
84+
monkeypatch.setattr(multiproc_executor_module.time, "sleep", clock.sleep)
85+
executor = MultiprocExecutor.__new__(MultiprocExecutor)
86+
proc = _FakeProcess(clock, exits_at=exits_at)
87+
executor._ensure_worker_termination([proc])
88+
assert proc.terminate_called is expected_terminate
89+
90+
4691
class CustomMultiprocExecutor(MultiprocExecutor):
4792
def collective_rpc(
4893
self,

vllm/envs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@
203203
VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = True
204204
VLLM_MQ_MAX_CHUNK_BYTES_MB: int = 16
205205
VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS: int = 300
206+
VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS: int = 5
206207
VLLM_KV_CACHE_LAYOUT: Literal["NHD", "HND"] | None = None
207208
VLLM_SSM_CONV_STATE_LAYOUT: Literal["SD", "DS"] | None = None
208209
VLLM_COMPUTE_NANS_IN_LOGITS: bool = False
@@ -1552,6 +1553,10 @@ def _resolve_rust_frontend_path() -> str | None:
15521553
"VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS": lambda: int(
15531554
os.getenv("VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS", "300")
15541555
),
1556+
# Timeout in seconds for engine and worker process shutdown
1557+
"VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS": lambda: int(
1558+
os.getenv("VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS", "5")
1559+
),
15551560
# KV Cache layout used throughout vllm.
15561561
# Some common values are:
15571562
# - NHD
@@ -1994,6 +1999,7 @@ def compile_factors() -> dict[str, object]:
19941999
"VLLM_ENGINE_ITERATION_TIMEOUT_S",
19952000
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE",
19962001
"VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS",
2002+
"VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS",
19972003
"VLLM_KEEP_ALIVE_ON_ENGINE_DEATH",
19982004
"VLLM_IMAGE_FETCH_TIMEOUT",
19992005
"VLLM_VIDEO_FETCH_TIMEOUT",

vllm/v1/engine/core_client.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import zmq
2121
import zmq.asyncio
2222

23+
from vllm import envs
2324
from vllm.config import VllmConfig
2425
from vllm.envs import VLLM_ENGINE_READY_TIMEOUT_S
2526
from vllm.logger import init_logger
@@ -394,7 +395,9 @@ def __call__(self):
394395
logger.debug_once("[shutdown] MPClient: background resource cleanup start")
395396
self.engine_dead = True
396397
if self.engine_manager is not None:
397-
self.engine_manager.shutdown()
398+
self.engine_manager.shutdown(
399+
timeout=envs.VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS
400+
)
398401
if self.coordinator is not None:
399402
self.coordinator.shutdown()
400403

vllm/v1/executor/multiproc_executor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,9 @@ def wait_for_termination(procs, timeout):
429429
"[shutdown] Executor: waiting for worker exit count=%d",
430430
initial_count,
431431
)
432-
if wait_for_termination(active_procs(), 4):
432+
if wait_for_termination(
433+
active_procs(), timeout=envs.VLLM_WORKER_SHUTDOWN_TIMEOUT_SECONDS
434+
):
433435
logger.info_once("[shutdown] Executor: all workers exited gracefully")
434436
return
435437

0 commit comments

Comments
 (0)