mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:15:51 +08:00
[Compressed Tensors] Always clone output for compile robustness (#26849)
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
parent
ac3ed5a815
commit
a5464dcf92
@ -163,7 +163,7 @@ class CompressedTensorsLinearTransformMethod(LinearMethodBase):
|
||||
if self.output_transform is not None:
|
||||
for part_id, (start, length) in enumerate(self.partition_ranges):
|
||||
x[:, start : start + length] = self.output_transform(
|
||||
x[:, start : start + length].contiguous(), part_id=part_id
|
||||
x[:, start : start + length].clone(), part_id=part_id
|
||||
)
|
||||
|
||||
return x
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user