[misc] Clean up ray compiled graph type hints (#13731)

This commit is contained in:
Rui Qiao 2025-02-25 00:37:08 -08:00 committed by GitHub
parent 2d87d7d1ac
commit 3173c3b34e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 6 deletions

View File

@ -528,10 +528,18 @@ class RayDistributedExecutor(DistributedExecutorBase):
envs.VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)
with InputNode() as input_data:
# Example DAG: PP=2, TP=4
# (ExecuteModelReq, None) -> 0 -> (ExecuteModelReq, IntermediateOutput) -> 4 -> SamplerOutput # noqa: E501
# -> 1 -> (ExecuteModelReq, IntermediateOutput) -> 5 -> SamplerOutput # noqa: E501
# -> 2 -> (ExecuteModelReq, IntermediateOutput) -> 6 -> SamplerOutput # noqa: E501
# -> 3 -> (ExecuteModelReq, IntermediateOutput) -> 7 -> SamplerOutput # noqa: E501
#
# For V0:
# ExecuteModelRequest -> 0 -> (ExecuteModelReq, IntermediateTensors) -> 4 -> SamplerOutput # noqa: E501
# ExecuteModelRequest -> 1 -> (ExecuteModelReq, IntermediateTensors) -> 5 -> SamplerOutput # noqa: E501
# ExecuteModelRequest -> 2 -> (ExecuteModelReq, IntermediateTensors) -> 6 -> SamplerOutput # noqa: E501
# ExecuteModelRequest -> 3 -> (ExecuteModelReq, IntermediateTensors) -> 7 -> SamplerOutput # noqa: E501
#
# For V1:
# SchedulerOutput -> 0 -> (SchedulerOutput, IntermediateTensors) -> 4 -> ModelRunnerOutput # noqa: E501
# SchedulerOutput -> 1 -> (SchedulerOutput, IntermediateTensors) -> 5 -> ModelRunnerOutput # noqa: E501
# SchedulerOutput -> 2 -> (SchedulerOutput, IntermediateTensors) -> 6 -> ModelRunnerOutput # noqa: E501
# SchedulerOutput -> 3 -> (SchedulerOutput, IntermediateTensors) -> 7 -> ModelRunnerOutput # noqa: E501
# All workers in the first TP group will take in the
# ExecuteModelRequest as input.

View File

@ -114,8 +114,11 @@ try:
def execute_model_ray(
self,
scheduler_output: "SchedulerOutput",
) -> "ModelRunnerOutput":
scheduler_output: Union["SchedulerOutput",
Tuple["SchedulerOutput",
"IntermediateTensors"]],
) -> Union["ModelRunnerOutput", Tuple["SchedulerOutput",
"IntermediateTensors"]]:
# this method is used to compile ray CG,
# and it needs a special logic of self.setup_device_if_necessary()
self.setup_device_if_necessary()