mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:15:51 +08:00
[Doc] Fix batch-level DP example (#23325)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
0c6e40bbaa
commit
5cc54f7c5b
@ -153,13 +153,14 @@ from vllm import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen2.5-VL-72B-Instruct",
|
||||
# Create two EngineCore instances, one per DP rank
|
||||
data_parallel_size=2,
|
||||
# Within each EngineCore instance:
|
||||
# The vision encoder uses TP=4 (not DP=2) to shard the input data
|
||||
# The language decoder uses TP=4 to shard the weights as usual
|
||||
tensor_parallel_size=4,
|
||||
# When mm_encoder_tp_mode="data",
|
||||
# the vision encoder uses TP=4 (not DP=1) to shard the input data,
|
||||
# so the TP size becomes the effective DP size.
|
||||
# Note that this is independent of the DP size for language decoder which is used in expert parallel setting.
|
||||
mm_encoder_tp_mode="data",
|
||||
# The language decoder uses TP=4 to shard the weights regardless
|
||||
# of the setting of mm_encoder_tp_mode
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user