From 8625dee2c04eda7b1b091fbf1e2c511d0e4915ec Mon Sep 17 00:00:00 2001 From: yurekami Date: Wed, 24 Dec 2025 04:03:23 +0900 Subject: [PATCH] [Bug] Fix Qwen3-VL 2:4 sparsity shape mismatch during decompression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the shape tensor loaded from the model checkpoint instead of computing from layer attributes (logical_widths, input_size_per_partition) which may not be properly initialized for some layer types like RowParallelLinear in vision models. Fixes #31019 Signed-off-by: yurekami 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../schemes/compressed_tensors_24.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py index 571ce267f3fa6..bc4a861b0bfdf 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py @@ -379,13 +379,15 @@ class CompressedTensors24(CompressedTensorsScheme): ] decompressed = combine_shards(decompressed_shards) else: + # Use the shape loaded from the model checkpoint rather than + # computing from layer attributes, as some layer types (e.g., + # RowParallelLinear in vision models) may not have logical_widths + # or input_size_per_partition properly initialized. + out_dim, in_dim = layer.shape.data.view(-1).tolist() decompressed = sparsity_compressor.decompress_weight( dict( compressed=compressed, - shape=( - layer.logical_widths[0], - layer.input_size_per_partition, - ), + shape=(out_dim, in_dim), bitmask=bitmask, ) )