From 47f670b03b7dfb4e1149eb8b14ba9edcfc297255 Mon Sep 17 00:00:00 2001
From: samzong <samzong.lu@gmail.com>
Date: Wed, 17 Sep 2025 22:31:20 +0800
Subject: [PATCH] [Docs] improve code formatting and comments for eliminate
 griffe build warning. (#25010)

Signed-off-by: samzong <samzong.lu@gmail.com>
---
 vllm/benchmarks/serve.py                |  2 +-
 vllm/distributed/eplb/eplb_state.py     |  9 +++++----
 vllm/distributed/eplb/rebalance_algo.py | 23 ++++++++++++++---------
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py
index 33e831e54bbc9..1aeef0fd5bd85 100644
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -139,7 +139,7 @@ async def get_request(
             A lower burstiness value (0 < burstiness < 1) results
             in more bursty requests, while a higher burstiness value
             (burstiness > 1) results in a more uniform arrival of requests.
-         ramp_up_strategy (optional):
+        ramp_up_strategy (optional):
             The ramp-up strategy. Can be "linear" or "exponential".
             If None, uses constant request rate (specified by request_rate).
         ramp_up_start_rps (optional):
diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py
index 8f8baa7d59db7..3e318d7848326 100644
--- a/vllm/distributed/eplb/eplb_state.py
+++ b/vllm/distributed/eplb/eplb_state.py
@@ -337,11 +337,12 @@ class EplbState:
         Args:
             model (MixtureOfExperts): The MoE model.
             is_dummy (bool): If `True`, this is a dummy step and the load
-                metrics recorded in this forward pass will not count. Defaults
-                to `False`.
+                metrics recorded in this forward pass will not count.
+                Defaults to `False`.
             is_profile (bool): If `True`, perform a dummy rearrangement
-                with maximum communication cost. This is used in `profile_run`
-                to reserve enough memory for the communication buffer.
+                with maximum communication cost. This is used in
+                `profile_run` to reserve enough memory
+                for the communication buffer.
             log_stats (bool): If `True`, log the expert load metrics.
 
         # Stats
diff --git a/vllm/distributed/eplb/rebalance_algo.py b/vllm/distributed/eplb/rebalance_algo.py
index 3564a10dfc684..fc43dbe3b6533 100644
--- a/vllm/distributed/eplb/rebalance_algo.py
+++ b/vllm/distributed/eplb/rebalance_algo.py
@@ -109,13 +109,16 @@ def rebalance_experts_hierarchical(
         num_physical_experts: number of physical experts after replication
         num_groups: number of expert groups
         num_nodes: number of server nodes, where the intra-node network 
-            (e.g, NVLink) is faster
+            (e.g., NVLink) is faster
         num_gpus: number of GPUs, must be a multiple of `num_nodes`
 
     Returns:
-        physical_to_logical_map: [num_moe_layers, num_physical_experts]
-        logical_to_physical_map: [num_moe_layers, num_logical_experts, X]
-        logical_count: [num_moe_layers, num_logical_experts]
+        physical_to_logical_map (torch.Tensor):
+            [num_moe_layers, num_physical_experts]
+        logical_to_physical_map (torch.Tensor):
+            [num_moe_layers, num_logical_experts, X]
+        logical_count (torch.Tensor):
+            [num_moe_layers, num_logical_experts]
     """
     num_layers, num_logical_experts = weight.shape
     assert num_logical_experts % num_groups == 0
@@ -197,11 +200,13 @@ def rebalance_experts(
         num_gpus: number of GPUs, must be a multiple of `num_nodes`
 
     Returns:
-        physical_to_logical_map: [layers, num_replicas], the expert index of
-            each replica
-        logical_to_physical_map: [layers, num_logical_experts, X], the replica
-            indices for each expert
-        expert_count: [layers, num_logical_experts], number of physical
+        physical_to_logical_map:
+            [layers, num_replicas], the expert index of each replica
+        logical_to_physical_map:
+            [layers, num_logical_experts, X], the replica indices for each
+            expert
+        expert_count:
+            [layers, num_logical_experts], number of physical
             replicas for each logical expert
     """
     num_layers, num_logical_experts = weight.shape