mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 23:47:10 +08:00
fix num_tokens_across_dp sizing issue
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
44ead56ad5
commit
e526b1c091
@ -2692,8 +2692,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
assert num_reqs == num_tokens
|
||||
assert num_tokens % 2 == 0
|
||||
num_tokens_per_ubatch = num_tokens // 2
|
||||
num_tokens_across_dp = torch.tensor([num_tokens_per_ubatch] * 2,
|
||||
device="cpu",
|
||||
dp_size = self.vllm_config.parallel_config.data_parallel_size
|
||||
num_tokens_across_dp = torch.tensor([num_tokens_per_ubatch] * dp_size,
|
||||
device="cpu",
|
||||
dtype=torch.int32)
|
||||
ubatch_slices = [(slice(0,
|
||||
num_reqs // 2), slice(0, num_tokens // 2)),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user