mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-06 02:35:41 +08:00
This commit is contained in:
parent
6db94571d7
commit
b5269db959
@ -115,10 +115,6 @@ class MarlinLinearKernel(MPLinearKernel):
|
|||||||
layer: torch.nn.Module,
|
layer: torch.nn.Module,
|
||||||
x: torch.Tensor,
|
x: torch.Tensor,
|
||||||
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
|
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
|
||||||
# marlin requires contiguous memory layout
|
|
||||||
# prefix caching may cause x to be non-contiguous
|
|
||||||
x = x.contiguous() # no-op if already contiguous
|
|
||||||
|
|
||||||
c = self.config
|
c = self.config
|
||||||
w_q, w_s, w_zp, w_gidx = self._get_weight_params(layer)
|
w_q, w_s, w_zp, w_gidx = self._get_weight_params(layer)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user