mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:34:57 +08:00
[misc] Clean up ray compiled graph type hints (#13731)
This commit is contained in:
parent
2d87d7d1ac
commit
3173c3b34e
@ -528,10 +528,18 @@ class RayDistributedExecutor(DistributedExecutorBase):
|
|||||||
envs.VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)
|
envs.VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)
|
||||||
with InputNode() as input_data:
|
with InputNode() as input_data:
|
||||||
# Example DAG: PP=2, TP=4
|
# Example DAG: PP=2, TP=4
|
||||||
# (ExecuteModelReq, None) -> 0 -> (ExecuteModelReq, IntermediateOutput) -> 4 -> SamplerOutput # noqa: E501
|
#
|
||||||
# -> 1 -> (ExecuteModelReq, IntermediateOutput) -> 5 -> SamplerOutput # noqa: E501
|
# For V0:
|
||||||
# -> 2 -> (ExecuteModelReq, IntermediateOutput) -> 6 -> SamplerOutput # noqa: E501
|
# ExecuteModelRequest -> 0 -> (ExecuteModelReq, IntermediateTensors) -> 4 -> SamplerOutput # noqa: E501
|
||||||
# -> 3 -> (ExecuteModelReq, IntermediateOutput) -> 7 -> SamplerOutput # noqa: E501
|
# ExecuteModelRequest -> 1 -> (ExecuteModelReq, IntermediateTensors) -> 5 -> SamplerOutput # noqa: E501
|
||||||
|
# ExecuteModelRequest -> 2 -> (ExecuteModelReq, IntermediateTensors) -> 6 -> SamplerOutput # noqa: E501
|
||||||
|
# ExecuteModelRequest -> 3 -> (ExecuteModelReq, IntermediateTensors) -> 7 -> SamplerOutput # noqa: E501
|
||||||
|
#
|
||||||
|
# For V1:
|
||||||
|
# SchedulerOutput -> 0 -> (SchedulerOutput, IntermediateTensors) -> 4 -> ModelRunnerOutput # noqa: E501
|
||||||
|
# SchedulerOutput -> 1 -> (SchedulerOutput, IntermediateTensors) -> 5 -> ModelRunnerOutput # noqa: E501
|
||||||
|
# SchedulerOutput -> 2 -> (SchedulerOutput, IntermediateTensors) -> 6 -> ModelRunnerOutput # noqa: E501
|
||||||
|
# SchedulerOutput -> 3 -> (SchedulerOutput, IntermediateTensors) -> 7 -> ModelRunnerOutput # noqa: E501
|
||||||
|
|
||||||
# All workers in the first TP group will take in the
|
# All workers in the first TP group will take in the
|
||||||
# ExecuteModelRequest as input.
|
# ExecuteModelRequest as input.
|
||||||
|
|||||||
@ -114,8 +114,11 @@ try:
|
|||||||
|
|
||||||
def execute_model_ray(
|
def execute_model_ray(
|
||||||
self,
|
self,
|
||||||
scheduler_output: "SchedulerOutput",
|
scheduler_output: Union["SchedulerOutput",
|
||||||
) -> "ModelRunnerOutput":
|
Tuple["SchedulerOutput",
|
||||||
|
"IntermediateTensors"]],
|
||||||
|
) -> Union["ModelRunnerOutput", Tuple["SchedulerOutput",
|
||||||
|
"IntermediateTensors"]]:
|
||||||
# this method is used to compile ray CG,
|
# this method is used to compile ray CG,
|
||||||
# and it needs a special logic of self.setup_device_if_necessary()
|
# and it needs a special logic of self.setup_device_if_necessary()
|
||||||
self.setup_device_if_necessary()
|
self.setup_device_if_necessary()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user