diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 730f272b54e7c..21c5e247bc755 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -148,8 +148,8 @@ steps: # TODO: create a dedicated test section for multi-GPU example tests # when we have multiple distributed example tests - pushd ../examples/offline_inference - - python3 rlhf.py - - RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py + - VLLM_ENABLE_V1_MULTIPROCESSING=0 python3 rlhf.py + - VLLM_ENABLE_V1_MULTIPROCESSING=0 RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py - popd - label: Metrics, Tracing Test # 10min diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 5d06a86e6950d..dd0a6256379fe 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1459,16 +1459,6 @@ class EngineArgs: recommend_to_remove=False) return False - if self.worker_cls != EngineArgs.worker_cls: - _raise_or_fallback(feature_name="--worker-cls", - recommend_to_remove=False) - return False - - if self.worker_extension_cls != EngineArgs.worker_extension_cls: - _raise_or_fallback(feature_name="--worker-extension-cls", - recommend_to_remove=False) - return False - if self.num_scheduler_steps != EngineArgs.num_scheduler_steps: _raise_or_fallback(feature_name="--num-scheduler-steps", recommend_to_remove=True) diff --git a/vllm/utils.py b/vllm/utils.py index 9e09f0b9f2d94..d87ec44c75fd0 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2170,6 +2170,11 @@ def _maybe_force_spawn(): if cuda_is_initialized(): reason = "CUDA is initialized" elif is_in_ray_actor(): + # even if we choose to spawn, we need to pass the ray address + # to the subprocess so that it knows how to connect to the ray cluster. + # env vars are inherited by subprocesses, even if we use spawn. + import ray + os.environ["RAY_ADDRESS"] = ray.get_runtime_context().gcs_address reason = "In a Ray actor and can only be spawned" if reason is not None: