mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 19:35:01 +08:00
[feat] move WEIGHT_SCALE_SUPPORTED into raise block to accelerate RLHF weight loading (#21164)
Signed-off-by: huangweixiao <huangweixiao@msh.team>
This commit is contained in:
parent
a7b8788d2c
commit
c1b4eb048a
@ -1079,9 +1079,6 @@ class FusedMoE(torch.nn.Module):
|
|||||||
raise ValueError(f"shard_id must be ['w1','w2','w3'] but "
|
raise ValueError(f"shard_id must be ['w1','w2','w3'] but "
|
||||||
f"got {shard_id}.")
|
f"got {shard_id}.")
|
||||||
|
|
||||||
WEIGHT_SCALE_SUPPORTED = [
|
|
||||||
e.value for e in FusedMoeWeightScaleSupported
|
|
||||||
]
|
|
||||||
# Fetch the dim to shard the parameter/loaded weight
|
# Fetch the dim to shard the parameter/loaded weight
|
||||||
# based on the shard id. This will be whatever
|
# based on the shard id. This will be whatever
|
||||||
# dimension intermediate_size_per_partition is used.
|
# dimension intermediate_size_per_partition is used.
|
||||||
@ -1230,6 +1227,9 @@ class FusedMoE(torch.nn.Module):
|
|||||||
loaded_weight=loaded_weight,
|
loaded_weight=loaded_weight,
|
||||||
expert_id=expert_id)
|
expert_id=expert_id)
|
||||||
else:
|
else:
|
||||||
|
WEIGHT_SCALE_SUPPORTED = [
|
||||||
|
e.value for e in FusedMoeWeightScaleSupported
|
||||||
|
]
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"quant method must be one of {WEIGHT_SCALE_SUPPORTED}")
|
f"quant method must be one of {WEIGHT_SCALE_SUPPORTED}")
|
||||||
return True if return_success else None
|
return True if return_success else None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user