From a5464dcf92bba8dfd052fc79bfc40e08aee515d9 Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Thu, 16 Oct 2025 15:29:59 -0400
Subject: [PATCH] [Compressed Tensors] Always clone output for compile
 robustness (#26849)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
---
 .../layers/quantization/compressed_tensors/transform/linear.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py b/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py
index 696356ef1e33..bd1964e667d9 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py
@@ -163,7 +163,7 @@ class CompressedTensorsLinearTransformMethod(LinearMethodBase):
         if self.output_transform is not None:
             for part_id, (start, length) in enumerate(self.partition_ranges):
                 x[:, start : start + length] = self.output_transform(
-                    x[:, start : start + length].contiguous(), part_id=part_id
+                    x[:, start : start + length].clone(), part_id=part_id
                 )
 
         return x