From 8625dee2c04eda7b1b091fbf1e2c511d0e4915ec Mon Sep 17 00:00:00 2001
From: yurekami <yurekami@users.noreply.github.com>
Date: Wed, 24 Dec 2025 04:03:23 +0900
Subject: [PATCH] [Bug] Fix Qwen3-VL 2:4 sparsity shape mismatch during
 decompression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the shape tensor loaded from the model checkpoint instead of
computing from layer attributes (logical_widths, input_size_per_partition)
which may not be properly initialized for some layer types like
RowParallelLinear in vision models.

Fixes #31019

Signed-off-by: yurekami <yurekami@users.noreply.github.com>

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../schemes/compressed_tensors_24.py                   | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py
index 571ce267f3fa6..bc4a861b0bfdf 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py
@@ -379,13 +379,15 @@ class CompressedTensors24(CompressedTensorsScheme):
             ]
             decompressed = combine_shards(decompressed_shards)
         else:
+            # Use the shape loaded from the model checkpoint rather than
+            # computing from layer attributes, as some layer types (e.g.,
+            # RowParallelLinear in vision models) may not have logical_widths
+            # or input_size_per_partition properly initialized.
+            out_dim, in_dim = layer.shape.data.view(-1).tolist()
             decompressed = sparsity_compressor.decompress_weight(
                 dict(
                     compressed=compressed,
-                    shape=(
-                        layer.logical_widths[0],
-                        layer.input_size_per_partition,
-                    ),
+                    shape=(out_dim, in_dim),
                     bitmask=bitmask,
                 )
             )