From 44a595f6d653ee5ee289e51f17ef758b6e6842f6 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Mon, 2 Jun 2025 19:13:27 +0000 Subject: [PATCH] config format Signed-off-by: Sage Moore --- vllm/config.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 08de3fe32710e..658eaf4b48fa1 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1740,14 +1740,14 @@ class ParallelConfig: rank: int = 0 """Global rank in distributed setup.""" - + enable_microbatching: bool = False """Enable microbatching for the model executor.""" - + always_microbatch_if_enabled: bool = True - """Always microbatch if microbatching is enabled. Easier to sync bewteen + """Always microbatch if microbatching is enabled. Easier to sync between dp workers.""" - + microbatching_token_threshold: int = 4 """The threshold for microbatching. If the number of tokens in the request is greater than this threshold, microbatching will be used. @@ -4324,16 +4324,16 @@ class VllmConfig: "full_cuda_graph is not supported with " "cascade attention. Disabling cascade attention.") self.model_config.disable_cascade_attn = True - - if self.parallel_config.enable_microbatching: + + if self.parallel_config.enable_microbatching and \ + self.compilation_config.level >= CompilationLevel.PIECEWISE: # Microbatching is not supported with piecewise compilation yet. # More specifically piecewise cuda-graphs - if self.compilation_config.level >= CompilationLevel.PIECEWISE: - logger.warning_once( - "Piecewise compilation is not supported with " - "microbatching. Disabling piecewiseching compilation.") - self.compilation_config.level = CompilationLevel.NO_COMPILATION - + logger.warning_once( + "Piecewise compilation is not supported with " + "microbatching. Disabling piecewiseching compilation.") + self.compilation_config.level = CompilationLevel.NO_COMPILATION + if self.model_config and self.model_config.use_mla and \ not (current_platform.is_cuda() or current_platform.is_rocm()):