[Bug] Fix Layer weight_block_size Assertion Issue (#24674)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye 2025-09-11 19:47:59 -04:00 committed by GitHub
parent 7a30fa8708
commit fcba05c435
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -450,10 +450,10 @@ class Fp8LinearMethod(LinearMethodBase):
# Activations not quantized for marlin.
del layer.input_scale
# On B200, if E8M0 for DeepGemm is used, we need to
# On Blackwell or Hopper, if E8M0 for DeepGemm is used, we need to
# requantize the weight and input to the specific scale
# at the same time.
if is_deep_gemm_e8m0_used():
if is_deep_gemm_e8m0_used() and self.block_quant:
assert layer.weight_block_size is not None
block_sz = tuple(layer.weight_block_size)
requant_weight_ue8m0_inplace(
@ -905,7 +905,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
del layer.w13_input_scale
del layer.w2_input_scale
if is_deep_gemm_e8m0_used():
if is_deep_gemm_e8m0_used() and self.block_quant:
assert layer.weight_block_size is not None
# Re-quantise the expert weights so their scales are UE8M0.
block_sz = tuple(layer.weight_block_size)