From b5269db959e8f33a89b6d4670f688bf8033aa52b Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 24 Mar 2025 23:43:51 -0400 Subject: [PATCH] Revert "Fix non-contiguous input passed to Marlin kernel (#15319)" (#15398) --- .../layers/quantization/kernels/mixed_precision/marlin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py b/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py index b030e1484a6ad..e21801cf6a785 100644 --- a/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py +++ b/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py @@ -115,10 +115,6 @@ class MarlinLinearKernel(MPLinearKernel): layer: torch.nn.Module, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: - # marlin requires contiguous memory layout - # prefix caching may cause x to be non-contiguous - x = x.contiguous() # no-op if already contiguous - c = self.config w_q, w_s, w_zp, w_gidx = self._get_weight_params(layer)