From c1b4eb048a286ea5e7bcca730ae5676625f06541 Mon Sep 17 00:00:00 2001 From: Weixiao Huang Date: Mon, 4 Aug 2025 15:43:06 +0800 Subject: [PATCH] [feat] move WEIGHT_SCALE_SUPPORTED into raise block to accelerate RLHF weight loading (#21164) Signed-off-by: huangweixiao --- vllm/model_executor/layers/fused_moe/layer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index c2039adad99c3..9e7296feeae1e 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -1079,9 +1079,6 @@ class FusedMoE(torch.nn.Module): raise ValueError(f"shard_id must be ['w1','w2','w3'] but " f"got {shard_id}.") - WEIGHT_SCALE_SUPPORTED = [ - e.value for e in FusedMoeWeightScaleSupported - ] # Fetch the dim to shard the parameter/loaded weight # based on the shard id. This will be whatever # dimension intermediate_size_per_partition is used. @@ -1230,6 +1227,9 @@ class FusedMoE(torch.nn.Module): loaded_weight=loaded_weight, expert_id=expert_id) else: + WEIGHT_SCALE_SUPPORTED = [ + e.value for e in FusedMoeWeightScaleSupported + ] raise ValueError( f"quant method must be one of {WEIGHT_SCALE_SUPPORTED}") return True if return_success else None