Revert "Fix non-contiguous input passed to Marlin kernel (#15319)" (#15398)

2026-06-06 02:35:41 +08:00 · 2025-03-24 23:43:51 -04:00 · 2025-03-24 23:43:51 -04:00 · b5269db959
commit b5269db959
parent 6db94571d7
1 changed files with 0 additions and 4 deletions
--- a/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
+++ b/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
@ -115,10 +115,6 @@ class MarlinLinearKernel(MPLinearKernel):
                      layer: torch.nn.Module,
                      x: torch.Tensor,
                      bias: Optional[torch.Tensor] = None) -> torch.Tensor:
        # marlin requires contiguous memory layout
        # prefix caching may cause x to be non-contiguous
        x = x.contiguous()  # no-op if already contiguous
        c = self.config
        w_q, w_s, w_zp, w_gidx = self._get_weight_params(layer)