mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-20 15:17:02 +08:00
[Bug] Fix R1 Accuracy 0 Bug (#23294)
Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
parent
f8ce022948
commit
48bfb0c9b7
@ -1099,8 +1099,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
|||||||
apply_router_weight_on_input=apply_router_weight_on_input,
|
apply_router_weight_on_input=apply_router_weight_on_input,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
from vllm.model_executor.layers.fused_moe import fused_experts
|
common_kwargs = dict(
|
||||||
return fused_experts(
|
|
||||||
hidden_states=x,
|
hidden_states=x,
|
||||||
w1=layer.w13_weight,
|
w1=layer.w13_weight,
|
||||||
w2=layer.w2_weight,
|
w2=layer.w2_weight,
|
||||||
@ -1117,11 +1116,20 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
|||||||
if self.block_quant else layer.w2_weight_scale),
|
if self.block_quant else layer.w2_weight_scale),
|
||||||
a1_scale=layer.w13_input_scale,
|
a1_scale=layer.w13_input_scale,
|
||||||
a2_scale=layer.w2_input_scale,
|
a2_scale=layer.w2_input_scale,
|
||||||
use_fp8_w8a8=True,
|
)
|
||||||
block_shape=self.quant_config.weight_block_size,
|
|
||||||
allow_deep_gemm=self.allow_deep_gemm,
|
if self.fused_experts is not None:
|
||||||
allow_cutlass_block_scaled_grouped_gemm=(
|
return self.fused_experts(**common_kwargs)
|
||||||
self.allow_cutlass_block_scaled_grouped_gemm))
|
else:
|
||||||
|
from vllm.model_executor.layers.fused_moe import fused_experts
|
||||||
|
return fused_experts(
|
||||||
|
**common_kwargs,
|
||||||
|
use_fp8_w8a8=True,
|
||||||
|
block_shape=self.quant_config.weight_block_size,
|
||||||
|
allow_deep_gemm=self.allow_deep_gemm,
|
||||||
|
allow_cutlass_block_scaled_grouped_gemm=(
|
||||||
|
self.allow_cutlass_block_scaled_grouped_gemm),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Fp8KVCacheMethod(BaseKVCacheMethod):
|
class Fp8KVCacheMethod(BaseKVCacheMethod):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user