mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-20 06:37:01 +08:00
[Bugfix] Support MLA for CompressedTensorsWNA16 (#13725)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
4a8cfc7551
commit
18e505930d
@ -1130,13 +1130,13 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def get_layer_weight(layer):
|
def get_layer_weight(layer):
|
||||||
if hasattr(layer, "weight"):
|
WEIGHT_NAMES = ("weight", "qweight", "weight_packed")
|
||||||
return layer.weight
|
for attr in WEIGHT_NAMES:
|
||||||
elif hasattr(layer, "qweight"):
|
if hasattr(layer, attr):
|
||||||
return layer.qweight
|
return getattr(layer, attr)
|
||||||
else:
|
raise AttributeError(
|
||||||
raise AttributeError(
|
f"Layer '{layer}' has no recognized weight attribute:"
|
||||||
f"Layer '{layer}' has neither weight nor qweight")
|
f" {WEIGHT_NAMES}.")
|
||||||
|
|
||||||
def get_and_maybe_dequant_weights(layer: LinearBase):
|
def get_and_maybe_dequant_weights(layer: LinearBase):
|
||||||
if not isinstance(layer.quant_method, UnquantizedLinearMethod):
|
if not isinstance(layer.quant_method, UnquantizedLinearMethod):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user