mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-25 07:11:19 +08:00
[BugFix][V1] Fix int32 token index overflow when preparing input ids (#16806)
This commit is contained in:
parent
3cde34a4a4
commit
32d4b669d0
@ -241,10 +241,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
device=self.device)
|
device=self.device)
|
||||||
|
|
||||||
# OPTIMIZATION: Cache the tensors rather than creating them every step.
|
# OPTIMIZATION: Cache the tensors rather than creating them every step.
|
||||||
|
# Keep in int64 to avoid overflow with long context
|
||||||
self.arange_np = np.arange(max(self.max_num_reqs + 1,
|
self.arange_np = np.arange(max(self.max_num_reqs + 1,
|
||||||
self.max_model_len,
|
self.max_model_len,
|
||||||
self.max_num_tokens),
|
self.max_num_tokens),
|
||||||
dtype=np.int32)
|
dtype=np.int64)
|
||||||
# NOTE(woosuk): These tensors are "stateless", i.e., they are literally
|
# NOTE(woosuk): These tensors are "stateless", i.e., they are literally
|
||||||
# a faster version of creating a new tensor every time. Thus, we should
|
# a faster version of creating a new tensor every time. Thus, we should
|
||||||
# not make any assumptions about the values in these tensors.
|
# not make any assumptions about the values in these tensors.
|
||||||
|
|||||||
@ -219,7 +219,8 @@ class TPUModelRunner:
|
|||||||
|
|
||||||
# Range tensor with values [0 .. self.max_num_tokens - 1].
|
# Range tensor with values [0 .. self.max_num_tokens - 1].
|
||||||
# Used to initialize positions / context_lens / seq_lens
|
# Used to initialize positions / context_lens / seq_lens
|
||||||
self.arange_np = np.arange(self.max_num_tokens, dtype=np.int32)
|
# Keep in int64 to avoid overflow with long context
|
||||||
|
self.arange_np = np.arange(self.max_num_tokens, dtype=np.int64)
|
||||||
self.num_reqs_paddings = _get_req_paddings(
|
self.num_reqs_paddings = _get_req_paddings(
|
||||||
min_req_size=MIN_NUM_SEQS, max_req_size=self.max_num_reqs)
|
min_req_size=MIN_NUM_SEQS, max_req_size=self.max_num_reqs)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user