mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-30 10:38:45 +08:00
[Model] Qwen3 Dense FP8 Compat Fixes (#17318)
Signed-off-by: simon-mo <xmo@berkeley.edu>
This commit is contained in:
parent
ed2462030f
commit
dcbac4cb4b
@ -929,6 +929,15 @@ class QKVParallelLinear(ColumnParallelLinear):
|
||||
shard_offset = self._get_shard_offset_mapping(loaded_shard_id)
|
||||
shard_size = self._get_shard_size_mapping(loaded_shard_id)
|
||||
|
||||
# Note(simon): This is needed for Qwen3's fp8 quantization.
|
||||
if isinstance(param, BlockQuantScaleParameter):
|
||||
assert self.quant_method is not None
|
||||
assert hasattr(self.quant_method, "quant_config")
|
||||
weight_block_size = self.quant_method.quant_config.weight_block_size
|
||||
block_n, _ = weight_block_size[0], weight_block_size[1]
|
||||
shard_offset = (shard_offset + block_n - 1) // block_n
|
||||
shard_size = (shard_size + block_n - 1) // block_n
|
||||
|
||||
param.load_qkv_weight(loaded_weight=loaded_weight,
|
||||
num_heads=self.num_kv_head_replicas,
|
||||
shard_id=loaded_shard_id,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user