mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:45:01 +08:00
Fix V1 engine serialization error with Ray distributed executor (#26148)
Signed-off-by: Nikhil Ghosh <nikhil@anyscale.com>
This commit is contained in:
parent
300a59c4c3
commit
cd9e5b8340
@ -16,6 +16,7 @@ from vllm.logger import init_logger
|
|||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.sequence import ExecuteModelRequest, IntermediateTensors
|
from vllm.sequence import ExecuteModelRequest, IntermediateTensors
|
||||||
from vllm.utils import get_ip
|
from vllm.utils import get_ip
|
||||||
|
from vllm.v1.outputs import AsyncModelRunnerOutput
|
||||||
from vllm.v1.worker.worker_base import WorkerWrapperBase
|
from vllm.v1.worker.worker_base import WorkerWrapperBase
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@ -142,6 +143,11 @@ try:
|
|||||||
# but may still be finished requests.
|
# but may still be finished requests.
|
||||||
assert not output or not output.req_ids
|
assert not output or not output.req_ids
|
||||||
output = scheduler_output, None
|
output = scheduler_output, None
|
||||||
|
# Ensure outputs crossing Ray compiled DAG are serializable.
|
||||||
|
# AsyncModelRunnerOutput holds CUDA events and cannot be
|
||||||
|
# pickled.
|
||||||
|
if isinstance(output, AsyncModelRunnerOutput):
|
||||||
|
output = output.get_output()
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def override_env_vars(self, vars: Dict[str, str]):
|
def override_env_vars(self, vars: Dict[str, str]):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user