mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:15:20 +08:00
[V1] Unify VLLM_ENABLE_V1_MULTIPROCESSING handling in RayExecutor (#11472)
This commit is contained in:
parent
3f3e92e1f2
commit
9832e5572a
@ -127,11 +127,6 @@ def test_models_distributed(
|
|||||||
if attention_backend:
|
if attention_backend:
|
||||||
os.environ["VLLM_ATTENTION_BACKEND"] = attention_backend
|
os.environ["VLLM_ATTENTION_BACKEND"] = attention_backend
|
||||||
|
|
||||||
# Import VLLM_USE_V1 dynamically to handle patching
|
|
||||||
from vllm.envs import VLLM_USE_V1
|
|
||||||
if VLLM_USE_V1 and distributed_executor_backend != "mp":
|
|
||||||
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
|
|
||||||
|
|
||||||
dtype = "half"
|
dtype = "half"
|
||||||
max_tokens = 5
|
max_tokens = 5
|
||||||
|
|
||||||
|
|||||||
@ -21,7 +21,6 @@ from vllm.v1.engine.core_client import EngineCoreClient
|
|||||||
from vllm.v1.engine.detokenizer import Detokenizer
|
from vllm.v1.engine.detokenizer import Detokenizer
|
||||||
from vllm.v1.engine.processor import Processor
|
from vllm.v1.engine.processor import Processor
|
||||||
from vllm.v1.executor.abstract import Executor
|
from vllm.v1.executor.abstract import Executor
|
||||||
from vllm.v1.executor.ray_utils import initialize_ray_cluster
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -112,7 +111,6 @@ class LLMEngine:
|
|||||||
distributed_executor_backend = (
|
distributed_executor_backend = (
|
||||||
vllm_config.parallel_config.distributed_executor_backend)
|
vllm_config.parallel_config.distributed_executor_backend)
|
||||||
if distributed_executor_backend == "ray":
|
if distributed_executor_backend == "ray":
|
||||||
initialize_ray_cluster(vllm_config.parallel_config)
|
|
||||||
from vllm.v1.executor.ray_executor import RayExecutor
|
from vllm.v1.executor.ray_executor import RayExecutor
|
||||||
executor_class = RayExecutor
|
executor_class = RayExecutor
|
||||||
elif distributed_executor_backend == "mp":
|
elif distributed_executor_backend == "mp":
|
||||||
|
|||||||
@ -8,7 +8,8 @@ from vllm.config import VllmConfig
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
|
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
|
||||||
from vllm.v1.executor.abstract import Executor
|
from vllm.v1.executor.abstract import Executor
|
||||||
from vllm.v1.executor.ray_utils import RayWorkerWrapper, ray
|
from vllm.v1.executor.ray_utils import (RayWorkerWrapper,
|
||||||
|
initialize_ray_cluster, ray)
|
||||||
from vllm.v1.outputs import ModelRunnerOutput
|
from vllm.v1.outputs import ModelRunnerOutput
|
||||||
|
|
||||||
if ray is not None:
|
if ray is not None:
|
||||||
@ -33,7 +34,9 @@ class RayExecutor(Executor):
|
|||||||
if ray_usage != "1":
|
if ray_usage != "1":
|
||||||
os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
|
os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
|
||||||
|
|
||||||
|
initialize_ray_cluster(self.parallel_config)
|
||||||
placement_group = self.parallel_config.placement_group
|
placement_group = self.parallel_config.placement_group
|
||||||
|
|
||||||
# Create the parallel GPU workers.
|
# Create the parallel GPU workers.
|
||||||
self._init_workers_ray(placement_group)
|
self._init_workers_ray(placement_group)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user