diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py b/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py index 696356ef1e33..bd1964e667d9 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py @@ -163,7 +163,7 @@ class CompressedTensorsLinearTransformMethod(LinearMethodBase): if self.output_transform is not None: for part_id, (start, length) in enumerate(self.partition_ranges): x[:, start : start + length] = self.output_transform( - x[:, start : start + length].contiguous(), part_id=part_id + x[:, start : start + length].clone(), part_id=part_id ) return x