mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-07 18:43:04 +08:00
[Perf][Async Scheduling] Remove CPU->GPU sync in dummy_run (#27455)
Signed-off-by: Lehua Ding <lehuading@tencent.com>
This commit is contained in:
parent
17af6aa0da
commit
0402428200
@ -3492,7 +3492,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
self.eplb_step(is_dummy=True, is_profile=is_profile)
|
||||
|
||||
logit_indices = np.cumsum(num_scheduled_tokens) - 1
|
||||
return hidden_states, hidden_states[logit_indices]
|
||||
logit_indices_device = torch.from_numpy(logit_indices).to(
|
||||
self.device, non_blocking=True
|
||||
)
|
||||
return hidden_states, hidden_states[logit_indices_device]
|
||||
|
||||
@torch.inference_mode()
|
||||
def _dummy_sampler_run(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user