mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-26 22:23:12 +08:00
[Bugfix] Support MLA for CompressedTensorsWNA16 (#13725)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
4a8cfc7551
commit
18e505930d
@ -1130,13 +1130,13 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
|
||||
)
|
||||
|
||||
def get_layer_weight(layer):
|
||||
if hasattr(layer, "weight"):
|
||||
return layer.weight
|
||||
elif hasattr(layer, "qweight"):
|
||||
return layer.qweight
|
||||
else:
|
||||
raise AttributeError(
|
||||
f"Layer '{layer}' has neither weight nor qweight")
|
||||
WEIGHT_NAMES = ("weight", "qweight", "weight_packed")
|
||||
for attr in WEIGHT_NAMES:
|
||||
if hasattr(layer, attr):
|
||||
return getattr(layer, attr)
|
||||
raise AttributeError(
|
||||
f"Layer '{layer}' has no recognized weight attribute:"
|
||||
f" {WEIGHT_NAMES}.")
|
||||
|
||||
def get_and_maybe_dequant_weights(layer: LinearBase):
|
||||
if not isinstance(layer.quant_method, UnquantizedLinearMethod):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user