[torch.compile] fix tensor alias (#8982)

This commit is contained in:
youkaichao 2024-09-30 20:40:48 -07:00 committed by GitHub
parent aaccca2b4d
commit 7da2487591
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 9 additions and 3 deletions

View File

@ -103,7 +103,8 @@ class EmbeddingModelRunner(
# a placeholder (it has wide hardware support).
kv_caches = [
torch.tensor([], dtype=torch.float32, device=self.device)
] * num_layers
for _ in range(num_layers)
]
execute_model_kwargs = {
"input_ids":

View File

@ -348,7 +348,8 @@ class EncoderDecoderModelRunner(GPUModelRunnerBase[EncoderDecoderModelInput]):
# a placeholder (it has wide hardware support).
kv_caches = [
torch.tensor([], dtype=torch.float32, device=self.device)
] * num_layers
for _ in range(num_layers)
]
finished_requests_ids = [seq.request_id for seq in seqs]
model_input = self.prepare_model_input(
seqs, finished_requests_ids=finished_requests_ids)

View File

@ -1244,9 +1244,13 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
# it by reference, rather by specializing on the value ``None``.
# the `dtype` argument does not matter, and we use `float32` as
# a placeholder (it has wide hardware support).
# it is important to create tensors inside the loop, rather than
# multiplying the list, to avoid Dynamo from treating them as
# tensor aliasing.
kv_caches = [
torch.tensor([], dtype=torch.float32, device=self.device)
] * num_layers
for _ in range(num_layers)
]
finished_requests_ids = [seq.request_id for seq in seqs]
model_input = self.prepare_model_input(
seqs, finished_requests_ids=finished_requests_ids)