From a5464dcf92bba8dfd052fc79bfc40e08aee515d9 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Thu, 16 Oct 2025 15:29:59 -0400 Subject: [PATCH] [Compressed Tensors] Always clone output for compile robustness (#26849) Signed-off-by: Kyle Sayers Co-authored-by: Michael Goin --- .../layers/quantization/compressed_tensors/transform/linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py b/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py index 696356ef1e33..bd1964e667d9 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py @@ -163,7 +163,7 @@ class CompressedTensorsLinearTransformMethod(LinearMethodBase): if self.output_transform is not None: for part_id, (start, length) in enumerate(self.partition_ranges): x[:, start : start + length] = self.output_transform( - x[:, start : start + length].contiguous(), part_id=part_id + x[:, start : start + length].clone(), part_id=part_id ) return x