mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 22:15:01 +08:00
[CI] Fix Pre-commit Issue (#25497)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
8bdd8b5c51
commit
8b8a8afc89
@ -2367,7 +2367,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
sampling_metadata: SamplingMetadata,
|
sampling_metadata: SamplingMetadata,
|
||||||
hidden_states: torch.Tensor,
|
hidden_states: torch.Tensor,
|
||||||
sample_hidden_states: torch.Tensor,
|
sample_hidden_states: torch.Tensor,
|
||||||
aux_hidden_states: Optional[torch.Tensor],
|
aux_hidden_states: Optional[list[torch.Tensor]],
|
||||||
spec_decode_metadata: Optional[SpecDecodeMetadata],
|
spec_decode_metadata: Optional[SpecDecodeMetadata],
|
||||||
common_attn_metadata: CommonAttentionMetadata,
|
common_attn_metadata: CommonAttentionMetadata,
|
||||||
) -> Union[list[list[int]], torch.Tensor]:
|
) -> Union[list[list[int]], torch.Tensor]:
|
||||||
@ -2387,6 +2387,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
else:
|
else:
|
||||||
indices = []
|
indices = []
|
||||||
offset = 0
|
offset = 0
|
||||||
|
assert spec_decode_metadata is not None
|
||||||
for num_draft, tokens in zip(
|
for num_draft, tokens in zip(
|
||||||
spec_decode_metadata.num_draft_tokens,
|
spec_decode_metadata.num_draft_tokens,
|
||||||
sampled_token_ids):
|
sampled_token_ids):
|
||||||
@ -2437,6 +2438,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
# TODO(woosuk): Support M-RoPE.
|
# TODO(woosuk): Support M-RoPE.
|
||||||
target_positions = self.positions.gpu[:num_scheduled_tokens]
|
target_positions = self.positions.gpu[:num_scheduled_tokens]
|
||||||
if self.use_aux_hidden_state_outputs:
|
if self.use_aux_hidden_state_outputs:
|
||||||
|
assert aux_hidden_states is not None
|
||||||
target_hidden_states = torch.cat(
|
target_hidden_states = torch.cat(
|
||||||
[h[:num_scheduled_tokens] for h in aux_hidden_states],
|
[h[:num_scheduled_tokens] for h in aux_hidden_states],
|
||||||
dim=-1)
|
dim=-1)
|
||||||
@ -2462,6 +2464,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
# TODO(woosuk): Support M-RoPE.
|
# TODO(woosuk): Support M-RoPE.
|
||||||
target_positions = self.positions.gpu[token_indices]
|
target_positions = self.positions.gpu[token_indices]
|
||||||
if self.use_aux_hidden_state_outputs:
|
if self.use_aux_hidden_state_outputs:
|
||||||
|
assert aux_hidden_states is not None
|
||||||
target_hidden_states = torch.cat(
|
target_hidden_states = torch.cat(
|
||||||
[h[token_indices] for h in aux_hidden_states], dim=-1)
|
[h[token_indices] for h in aux_hidden_states], dim=-1)
|
||||||
else:
|
else:
|
||||||
@ -2897,7 +2900,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
assert not create_mixed_batch
|
assert not create_mixed_batch
|
||||||
num_reqs = cdiv(num_tokens, max_query_len)
|
num_reqs = cdiv(num_tokens, max_query_len)
|
||||||
assert num_reqs <= max_num_reqs, \
|
assert num_reqs <= max_num_reqs, \
|
||||||
"Do not capture num_reqs > max_num_reqs for uniform batch"
|
f"Do not capture num_reqs {num_reqs} > max_num_reqs " \
|
||||||
|
f"{max_num_reqs} for uniform batch. Num tokens: " \
|
||||||
|
f"{num_tokens}, max_query_len: {max_query_len}"
|
||||||
num_scheduled_tokens_list = [max_query_len] * num_reqs
|
num_scheduled_tokens_list = [max_query_len] * num_reqs
|
||||||
if num_tokens % max_query_len != 0:
|
if num_tokens % max_query_len != 0:
|
||||||
num_scheduled_tokens_list[-1] = num_tokens % max_query_len
|
num_scheduled_tokens_list[-1] = num_tokens % max_query_len
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user