From 30bab971c02f34971da93f9834f3789ff48a2511 Mon Sep 17 00:00:00 2001
From: ilmarkov <markovilya197@gmail.com>
Date: Wed, 26 Nov 2025 14:11:19 +0000
Subject: [PATCH] Edit config and fix config post_init

Signed-off-by: ilmarkov <markovilya197@gmail.com>
---
 vllm/config/parallel.py                    |  4 ++++
 vllm/distributed/eplb/eplb_state.py        | 10 ++++++++--
 vllm/distributed/eplb/rebalance_execute.py |  6 +++---
 vllm/engine/arg_utils.py                   |  8 ++++----
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
index 7ba1da5db3849..44b89c3d24cbe 100644
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -60,6 +60,10 @@ class EPLBConfig:
     Log the balancedness each step of expert parallelism.
     This is turned off by default since it will cause communication overhead.
     """
+    log_balancedness_interval: int = 1
+    """
+    Interval for logging the balancedness.
+    """
     use_async: bool = False
     """
     Whether to use non-blocking EPLB.
diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py
index c768cc9a0593b..3ee421ed3d1cf 100644
--- a/vllm/distributed/eplb/eplb_state.py
+++ b/vllm/distributed/eplb/eplb_state.py
@@ -549,7 +549,12 @@ class EplbState:
             for eplb_model_state in self.model_states.values():
                 eplb_model_state.expert_load_pass.zero_()
 
-        if log_stats:
+        if (
+            log_stats
+            and self.expert_rearrangement_step
+            % self.parallel_config.eplb_config.log_balancedness_interval
+            == 0
+        ):
             # Sync the expert load pass for each model (main and drafter).
             # expert_load_pass: (num_moe_layers, num_physical_experts)
             expert_load_pass_list = self._sync_load_pass()
@@ -581,9 +586,10 @@ class EplbState:
 
                 if ep_group.rank() == 0:
                     logger.info(
-                        "EPLB step: %d for model %s: avg_tokens=%.2f, "
+                        "EPLB step: %d/%d for model %s: avg_tokens=%.2f, "
                         "max_tokens=%d, balancedness=%.4f",
                         self.expert_rearrangement_step,
+                        self.expert_rearrangement_step_interval,
                         eplb_model_state.model_name,
                         avg_tokens,
                         max_tokens,
diff --git a/vllm/distributed/eplb/rebalance_execute.py b/vllm/distributed/eplb/rebalance_execute.py
index aa9f77f3ca5c4..5bc111cf02756 100644
--- a/vllm/distributed/eplb/rebalance_execute.py
+++ b/vllm/distributed/eplb/rebalance_execute.py
@@ -528,9 +528,6 @@ def rearrange_expert_weights_inplace(
     # Max number of layers to group for communication
     max_group_layers = envs.VLLM_EPLB_SYNC_MAX_GROUPED_LAYERS
     max_group_layers = max(min(max_group_layers, num_moe_layers), 1)
-    logger.info_once(
-        f"EPLB Sync: rearrange max_group_layers: {max_group_layers}", scope="global"
-    )
 
     first_layer_weights = list(expert_weights[0])
     # Buffers to hold the expert weights during the exchange.
@@ -552,6 +549,9 @@ def rearrange_expert_weights_inplace(
                     group=ep_group,
                 )
         return
+    logger.info_once(
+        f"EPLB Sync: rearrange max_group_layers: {max_group_layers}", scope="global"
+    )
 
     # NOTE(bowen): We need this synchronize to run, but I don't know why.
     # If you figure out the reason, please let me know -- thank you!
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 696ff3a1f4024..8fbfcac7d2cd1 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -419,10 +419,10 @@ class EngineArgs:
     )
     _api_process_count: int = ParallelConfig._api_process_count
     _api_process_rank: int = ParallelConfig._api_process_rank
-    num_redundant_experts: int = EPLBConfig.num_redundant_experts
-    eplb_window_size: int = EPLBConfig.window_size
-    eplb_step_interval: int = EPLBConfig.step_interval
-    eplb_log_balancedness: bool = EPLBConfig.log_balancedness
+    num_redundant_experts: int | None = None
+    eplb_window_size: int | None = None
+    eplb_step_interval: int | None = None
+    eplb_log_balancedness: bool | None = None
     max_parallel_loading_workers: int | None = (
         ParallelConfig.max_parallel_loading_workers
     )