mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 12:35:01 +08:00
[Bugfix] Register serializers for V0 MQ Engine (#15009)
Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
parent
ab656f2c2f
commit
3b457143d2
@ -29,6 +29,8 @@ from vllm.engine.multiprocessing import (ENGINE_DEAD_ERROR, IPC_DATA_EXT,
|
|||||||
# yapf: enable
|
# yapf: enable
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
|
from vllm.transformers_utils.config import (
|
||||||
|
maybe_register_config_serialize_by_value)
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.worker.model_runner_base import InputProcessingError
|
from vllm.worker.model_runner_base import InputProcessingError
|
||||||
|
|
||||||
@ -42,12 +44,12 @@ class MQLLMEngine:
|
|||||||
"""A multiprocessing wrapper for :class:`LLMEngine`.
|
"""A multiprocessing wrapper for :class:`LLMEngine`.
|
||||||
|
|
||||||
This class is used to wrap the :class:`LLMEngine` class to enable use
|
This class is used to wrap the :class:`LLMEngine` class to enable use
|
||||||
in concurrnet manner. It runs a background loop and uses zeromq to
|
in concurrnet manner. It runs a background loop and uses zeromq to
|
||||||
receive new requests and stream outputs incrementally via ipc.
|
receive new requests and stream outputs incrementally via ipc.
|
||||||
|
|
||||||
The :class:`LLMEngine` generate or encode process is kicked off when a new
|
The :class:`LLMEngine` generate or encode process is kicked off when a new
|
||||||
RPCProcessRequest is received by the input_socket.
|
RPCProcessRequest is received by the input_socket.
|
||||||
|
|
||||||
The self.engine_loop checks the input_socket for new requests,
|
The self.engine_loop checks the input_socket for new requests,
|
||||||
adds them to the LLMEngine if there are any, calls the internal
|
adds them to the LLMEngine if there are any, calls the internal
|
||||||
:class:`LLMEngine.step()`, and sends the RequestOutputs back over
|
:class:`LLMEngine.step()`, and sends the RequestOutputs back over
|
||||||
@ -428,6 +430,9 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
|
|||||||
ipc_path: str, disable_log_stats: bool,
|
ipc_path: str, disable_log_stats: bool,
|
||||||
disable_log_requests: bool, engine_alive):
|
disable_log_requests: bool, engine_alive):
|
||||||
try:
|
try:
|
||||||
|
# Ensure we can serialize transformer config before spawning
|
||||||
|
maybe_register_config_serialize_by_value()
|
||||||
|
|
||||||
engine = MQLLMEngine.from_vllm_config(
|
engine = MQLLMEngine.from_vllm_config(
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
usage_context=usage_context,
|
usage_context=usage_context,
|
||||||
|
|||||||
@ -82,6 +82,8 @@ from vllm.entrypoints.openai.serving_transcription import (
|
|||||||
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
|
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
|
||||||
from vllm.entrypoints.utils import load_aware_call, with_cancellation
|
from vllm.entrypoints.utils import load_aware_call, with_cancellation
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
from vllm.transformers_utils.config import (
|
||||||
|
maybe_register_config_serialize_by_value)
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
|
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
|
||||||
is_valid_ipv6_address, set_ulimit)
|
is_valid_ipv6_address, set_ulimit)
|
||||||
@ -221,6 +223,9 @@ async def build_async_engine_client_from_engine_args(
|
|||||||
# so we need to spawn a new process
|
# so we need to spawn a new process
|
||||||
context = multiprocessing.get_context("spawn")
|
context = multiprocessing.get_context("spawn")
|
||||||
|
|
||||||
|
# Ensure we can serialize transformer config before spawning
|
||||||
|
maybe_register_config_serialize_by_value()
|
||||||
|
|
||||||
# The Process can raise an exception during startup, which may
|
# The Process can raise an exception during startup, which may
|
||||||
# not actually result in an exitcode being reported. As a result
|
# not actually result in an exitcode being reported. As a result
|
||||||
# we use a shared variable to communicate the information.
|
# we use a shared variable to communicate the information.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user