config format

Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
Sage Moore 2025-06-02 19:13:27 +00:00
parent 92e0cc79a8
commit 44a595f6d6

View File

@ -1740,14 +1740,14 @@ class ParallelConfig:
rank: int = 0 rank: int = 0
"""Global rank in distributed setup.""" """Global rank in distributed setup."""
enable_microbatching: bool = False enable_microbatching: bool = False
"""Enable microbatching for the model executor.""" """Enable microbatching for the model executor."""
always_microbatch_if_enabled: bool = True always_microbatch_if_enabled: bool = True
"""Always microbatch if microbatching is enabled. Easier to sync bewteen """Always microbatch if microbatching is enabled. Easier to sync between
dp workers.""" dp workers."""
microbatching_token_threshold: int = 4 microbatching_token_threshold: int = 4
"""The threshold for microbatching. If the number of tokens in the """The threshold for microbatching. If the number of tokens in the
request is greater than this threshold, microbatching will be used. request is greater than this threshold, microbatching will be used.
@ -4324,16 +4324,16 @@ class VllmConfig:
"full_cuda_graph is not supported with " "full_cuda_graph is not supported with "
"cascade attention. Disabling cascade attention.") "cascade attention. Disabling cascade attention.")
self.model_config.disable_cascade_attn = True self.model_config.disable_cascade_attn = True
if self.parallel_config.enable_microbatching: if self.parallel_config.enable_microbatching and \
self.compilation_config.level >= CompilationLevel.PIECEWISE:
# Microbatching is not supported with piecewise compilation yet. # Microbatching is not supported with piecewise compilation yet.
# More specifically piecewise cuda-graphs # More specifically piecewise cuda-graphs
if self.compilation_config.level >= CompilationLevel.PIECEWISE: logger.warning_once(
logger.warning_once( "Piecewise compilation is not supported with "
"Piecewise compilation is not supported with " "microbatching. Disabling piecewiseching compilation.")
"microbatching. Disabling piecewiseching compilation.") self.compilation_config.level = CompilationLevel.NO_COMPILATION
self.compilation_config.level = CompilationLevel.NO_COMPILATION
if self.model_config and self.model_config.use_mla and \ if self.model_config and self.model_config.use_mla and \
not (current_platform.is_cuda() or current_platform.is_rocm()): not (current_platform.is_cuda() or current_platform.is_rocm()):