mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-06 17:44:01 +08:00
[BugFix] Skip the Q component for QKVParallelLinear in the case of QKVCrossParallelLinear since its width is 0 (#22369)
Signed-off-by: sstamenk <sstamenk@amd.com>
This commit is contained in:
parent
1c859a1387
commit
6b04039a72
@ -121,6 +121,9 @@ def requantize_with_max_scale(
|
||||
if unfused_module_in_checkpoint:
|
||||
start = 0
|
||||
for idx, logical_width in enumerate(logical_widths):
|
||||
# Skip any component with zero width.
|
||||
if logical_width == 0:
|
||||
continue
|
||||
end = start + logical_width
|
||||
weight_dq = per_tensor_dequantize(weight[start:end, :],
|
||||
weight_scale[idx])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user