diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 730f272b54e7c..21c5e247bc755 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -148,8 +148,8 @@ steps:
   # TODO: create a dedicated test section for multi-GPU example tests
   # when we have multiple distributed example tests
   - pushd ../examples/offline_inference
-  - python3 rlhf.py
-  - RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
+  - VLLM_ENABLE_V1_MULTIPROCESSING=0 python3 rlhf.py
+  - VLLM_ENABLE_V1_MULTIPROCESSING=0 RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
   - popd
 
 - label: Metrics, Tracing Test # 10min
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 5d06a86e6950d..dd0a6256379fe 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1459,16 +1459,6 @@ class EngineArgs:
                                recommend_to_remove=False)
             return False
 
-        if self.worker_cls != EngineArgs.worker_cls:
-            _raise_or_fallback(feature_name="--worker-cls",
-                               recommend_to_remove=False)
-            return False
-
-        if self.worker_extension_cls != EngineArgs.worker_extension_cls:
-            _raise_or_fallback(feature_name="--worker-extension-cls",
-                               recommend_to_remove=False)
-            return False
-
         if self.num_scheduler_steps != EngineArgs.num_scheduler_steps:
             _raise_or_fallback(feature_name="--num-scheduler-steps",
                                recommend_to_remove=True)
diff --git a/vllm/utils.py b/vllm/utils.py
index 9e09f0b9f2d94..d87ec44c75fd0 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -2170,6 +2170,11 @@ def _maybe_force_spawn():
     if cuda_is_initialized():
         reason = "CUDA is initialized"
     elif is_in_ray_actor():
+        # even if we choose to spawn, we need to pass the ray address
+        # to the subprocess so that it knows how to connect to the ray cluster.
+        # env vars are inherited by subprocesses, even if we use spawn.
+        import ray
+        os.environ["RAY_ADDRESS"] = ray.get_runtime_context().gcs_address
         reason = "In a Ray actor and can only be spawned"
 
     if reason is not None: