mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:34:57 +08:00
[Bugfix][EPLB] Prevent user-provided EPLB config from being overwritten with defaults (#29911)
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
6fc5841db1
commit
e6f114ac25
@ -22,7 +22,14 @@ def get_model_args(
|
||||
"num_speculative_tokens": 1,
|
||||
"max_model_len": model_max_len,
|
||||
}
|
||||
|
||||
eplb_config = {
|
||||
"num_redundant_experts": tp_size,
|
||||
"window_size": 128,
|
||||
"step_interval": 1024,
|
||||
"log_balancedness": False,
|
||||
}
|
||||
if use_async:
|
||||
eplb_config["use_async"] = True
|
||||
model_args = {
|
||||
"pretrained": model_name,
|
||||
"dtype": "auto",
|
||||
@ -31,15 +38,10 @@ def get_model_args(
|
||||
"gpu_memory_utilization": 0.7,
|
||||
"speculative_config": speculative_config,
|
||||
"enable_expert_parallel": True,
|
||||
"num_redundant_experts": tp_size,
|
||||
"eplb_window_size": 128,
|
||||
"eplb_step_interval": 1024,
|
||||
"eplb_log_balancedness": False,
|
||||
"eplb_config": eplb_config,
|
||||
"enable_eplb": True,
|
||||
"max_model_len": model_max_len,
|
||||
}
|
||||
if use_async:
|
||||
model_args["eplb_config"] = {"use_async": True}
|
||||
return model_args
|
||||
|
||||
|
||||
|
||||
@ -421,10 +421,6 @@ class EngineArgs:
|
||||
)
|
||||
_api_process_count: int = ParallelConfig._api_process_count
|
||||
_api_process_rank: int = ParallelConfig._api_process_rank
|
||||
num_redundant_experts: int = EPLBConfig.num_redundant_experts
|
||||
eplb_window_size: int = EPLBConfig.window_size
|
||||
eplb_step_interval: int = EPLBConfig.step_interval
|
||||
eplb_log_balancedness: bool = EPLBConfig.log_balancedness
|
||||
max_parallel_loading_workers: int | None = (
|
||||
ParallelConfig.max_parallel_loading_workers
|
||||
)
|
||||
@ -1582,16 +1578,6 @@ class EngineArgs:
|
||||
)
|
||||
self.disable_nccl_for_dp_synchronization = True
|
||||
|
||||
# Forward the deprecated CLI args to the EPLB config.
|
||||
if self.num_redundant_experts is not None:
|
||||
self.eplb_config.num_redundant_experts = self.num_redundant_experts
|
||||
if self.eplb_window_size is not None:
|
||||
self.eplb_config.window_size = self.eplb_window_size
|
||||
if self.eplb_step_interval is not None:
|
||||
self.eplb_config.step_interval = self.eplb_step_interval
|
||||
if self.eplb_log_balancedness is not None:
|
||||
self.eplb_config.log_balancedness = self.eplb_log_balancedness
|
||||
|
||||
parallel_config = ParallelConfig(
|
||||
pipeline_parallel_size=self.pipeline_parallel_size,
|
||||
tensor_parallel_size=self.tensor_parallel_size,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user