mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-11 05:05:48 +08:00
[Bug] Fix Qwen3-VL 2:4 sparsity shape mismatch during decompression
Use the shape tensor loaded from the model checkpoint instead of computing from layer attributes (logical_widths, input_size_per_partition) which may not be properly initialized for some layer types like RowParallelLinear in vision models. Fixes #31019 Signed-off-by: yurekami <yurekami@users.noreply.github.com> 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
c016c95b45
commit
8625dee2c0
@ -379,13 +379,15 @@ class CompressedTensors24(CompressedTensorsScheme):
|
|||||||
]
|
]
|
||||||
decompressed = combine_shards(decompressed_shards)
|
decompressed = combine_shards(decompressed_shards)
|
||||||
else:
|
else:
|
||||||
|
# Use the shape loaded from the model checkpoint rather than
|
||||||
|
# computing from layer attributes, as some layer types (e.g.,
|
||||||
|
# RowParallelLinear in vision models) may not have logical_widths
|
||||||
|
# or input_size_per_partition properly initialized.
|
||||||
|
out_dim, in_dim = layer.shape.data.view(-1).tolist()
|
||||||
decompressed = sparsity_compressor.decompress_weight(
|
decompressed = sparsity_compressor.decompress_weight(
|
||||||
dict(
|
dict(
|
||||||
compressed=compressed,
|
compressed=compressed,
|
||||||
shape=(
|
shape=(out_dim, in_dim),
|
||||||
layer.logical_widths[0],
|
|
||||||
layer.input_size_per_partition,
|
|
||||||
),
|
|
||||||
bitmask=bitmask,
|
bitmask=bitmask,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user