Edit config and fix config post_init

Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
ilmarkov 2025-11-26 14:11:19 +00:00
parent b8533148ed
commit 30bab971c0
4 changed files with 19 additions and 9 deletions

View File

@ -60,6 +60,10 @@ class EPLBConfig:
Log the balancedness each step of expert parallelism.
This is turned off by default since it will cause communication overhead.
"""
log_balancedness_interval: int = 1
"""
Interval for logging the balancedness.
"""
use_async: bool = False
"""
Whether to use non-blocking EPLB.

View File

@ -549,7 +549,12 @@ class EplbState:
for eplb_model_state in self.model_states.values():
eplb_model_state.expert_load_pass.zero_()
if log_stats:
if (
log_stats
and self.expert_rearrangement_step
% self.parallel_config.eplb_config.log_balancedness_interval
== 0
):
# Sync the expert load pass for each model (main and drafter).
# expert_load_pass: (num_moe_layers, num_physical_experts)
expert_load_pass_list = self._sync_load_pass()
@ -581,9 +586,10 @@ class EplbState:
if ep_group.rank() == 0:
logger.info(
"EPLB step: %d for model %s: avg_tokens=%.2f, "
"EPLB step: %d/%d for model %s: avg_tokens=%.2f, "
"max_tokens=%d, balancedness=%.4f",
self.expert_rearrangement_step,
self.expert_rearrangement_step_interval,
eplb_model_state.model_name,
avg_tokens,
max_tokens,

View File

@ -528,9 +528,6 @@ def rearrange_expert_weights_inplace(
# Max number of layers to group for communication
max_group_layers = envs.VLLM_EPLB_SYNC_MAX_GROUPED_LAYERS
max_group_layers = max(min(max_group_layers, num_moe_layers), 1)
logger.info_once(
f"EPLB Sync: rearrange max_group_layers: {max_group_layers}", scope="global"
)
first_layer_weights = list(expert_weights[0])
# Buffers to hold the expert weights during the exchange.
@ -552,6 +549,9 @@ def rearrange_expert_weights_inplace(
group=ep_group,
)
return
logger.info_once(
f"EPLB Sync: rearrange max_group_layers: {max_group_layers}", scope="global"
)
# NOTE(bowen): We need this synchronize to run, but I don't know why.
# If you figure out the reason, please let me know -- thank you!

View File

@ -419,10 +419,10 @@ class EngineArgs:
)
_api_process_count: int = ParallelConfig._api_process_count
_api_process_rank: int = ParallelConfig._api_process_rank
num_redundant_experts: int = EPLBConfig.num_redundant_experts
eplb_window_size: int = EPLBConfig.window_size
eplb_step_interval: int = EPLBConfig.step_interval
eplb_log_balancedness: bool = EPLBConfig.log_balancedness
num_redundant_experts: int | None = None
eplb_window_size: int | None = None
eplb_step_interval: int | None = None
eplb_log_balancedness: bool | None = None
max_parallel_loading_workers: int | None = (
ParallelConfig.max_parallel_loading_workers
)