mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 05:05:01 +08:00
[Bugfix] Register reducer even if transformers_modules not available (#19510)
Signed-off-by: Seiji Eicher <seiji@anyscale.com>
This commit is contained in:
parent
8d775dd30a
commit
8d1096e7db
57
tests/config/test_mp_reducer.py
Normal file
57
tests/config/test_mp_reducer.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import sys
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from vllm.config import VllmConfig
|
||||||
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
|
from vllm.v1.engine.async_llm import AsyncLLM
|
||||||
|
|
||||||
|
|
||||||
|
def test_mp_reducer(monkeypatch):
|
||||||
|
"""
|
||||||
|
Test that _reduce_config reducer is registered when AsyncLLM is instantiated
|
||||||
|
without transformers_modules. This is a regression test for
|
||||||
|
https://github.com/vllm-project/vllm/pull/18640.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use V1 AsyncLLM which calls maybe_register_config_serialize_by_value
|
||||||
|
monkeypatch.setenv('VLLM_USE_V1', '1')
|
||||||
|
|
||||||
|
# Ensure transformers_modules is not in sys.modules
|
||||||
|
if 'transformers_modules' in sys.modules:
|
||||||
|
del sys.modules['transformers_modules']
|
||||||
|
|
||||||
|
with patch('multiprocessing.reducer.register') as mock_register:
|
||||||
|
engine_args = AsyncEngineArgs(
|
||||||
|
model="facebook/opt-125m",
|
||||||
|
max_model_len=32,
|
||||||
|
gpu_memory_utilization=0.1,
|
||||||
|
disable_log_stats=True,
|
||||||
|
disable_log_requests=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
async_llm = AsyncLLM.from_engine_args(
|
||||||
|
engine_args,
|
||||||
|
start_engine_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert mock_register.called, (
|
||||||
|
"multiprocessing.reducer.register should have been called")
|
||||||
|
|
||||||
|
vllm_config_registered = False
|
||||||
|
for call_args in mock_register.call_args_list:
|
||||||
|
# Verify that a reducer for VllmConfig was registered
|
||||||
|
if len(call_args[0]) >= 2 and call_args[0][0] == VllmConfig:
|
||||||
|
vllm_config_registered = True
|
||||||
|
|
||||||
|
reducer_func = call_args[0][1]
|
||||||
|
assert callable(
|
||||||
|
reducer_func), "Reducer function should be callable"
|
||||||
|
break
|
||||||
|
|
||||||
|
assert vllm_config_registered, (
|
||||||
|
"VllmConfig should have been registered to multiprocessing.reducer"
|
||||||
|
)
|
||||||
|
|
||||||
|
async_llm.shutdown()
|
||||||
@ -655,12 +655,28 @@ def maybe_register_config_serialize_by_value() -> None:
|
|||||||
""" # noqa
|
""" # noqa
|
||||||
try:
|
try:
|
||||||
import transformers_modules
|
import transformers_modules
|
||||||
|
transformers_modules_available = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# the config does not need trust_remote_code
|
transformers_modules_available = False
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
import multiprocessing
|
||||||
|
import pickle
|
||||||
|
|
||||||
import cloudpickle
|
import cloudpickle
|
||||||
|
|
||||||
|
from vllm.config import VllmConfig
|
||||||
|
|
||||||
|
# Register multiprocessing reducers to handle cross-process
|
||||||
|
# serialization of VllmConfig objects that may contain custom configs
|
||||||
|
# from transformers_modules
|
||||||
|
def _reduce_config(config: VllmConfig):
|
||||||
|
return (pickle.loads, (cloudpickle.dumps(config), ))
|
||||||
|
|
||||||
|
multiprocessing.reducer.register(VllmConfig, _reduce_config)
|
||||||
|
|
||||||
|
# Register transformers_modules with cloudpickle if available
|
||||||
|
if transformers_modules_available:
|
||||||
cloudpickle.register_pickle_by_value(transformers_modules)
|
cloudpickle.register_pickle_by_value(transformers_modules)
|
||||||
|
|
||||||
# ray vendors its own version of cloudpickle
|
# ray vendors its own version of cloudpickle
|
||||||
@ -668,21 +684,6 @@ def maybe_register_config_serialize_by_value() -> None:
|
|||||||
if ray:
|
if ray:
|
||||||
ray.cloudpickle.register_pickle_by_value(transformers_modules)
|
ray.cloudpickle.register_pickle_by_value(transformers_modules)
|
||||||
|
|
||||||
# multiprocessing uses pickle to serialize arguments when using spawn
|
|
||||||
# Here we get pickle to use cloudpickle to serialize config objects
|
|
||||||
# that contain instances of the custom config class to avoid
|
|
||||||
# serialization problems if the generated module (and model) has a `.`
|
|
||||||
# in its name
|
|
||||||
import multiprocessing
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
from vllm.config import VllmConfig
|
|
||||||
|
|
||||||
def _reduce_config(config: VllmConfig):
|
|
||||||
return (pickle.loads, (cloudpickle.dumps(config), ))
|
|
||||||
|
|
||||||
multiprocessing.reducer.register(VllmConfig, _reduce_config)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Unable to register remote classes used by"
|
"Unable to register remote classes used by"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user