temporarily remove enable_microbatching

Signed-off-by: Sage Moore <sage@neuralmagic.com>
2026-07-06 18:37:12 +08:00 · 2025-06-02 19:28:58 +00:00 · 2025-06-02 19:28:58 +00:00 · 919eef995b
commit 919eef995b
parent e34e4411b9
1 changed files with 0 additions and 13 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -1741,9 +1741,6 @@ class ParallelConfig:
    rank: int = 0
    """Global rank in distributed setup."""

-    enable_microbatching: bool = False
-    """Enable microbatching for the model executor."""
-
    always_microbatch_if_enabled: bool = True
    """Always microbatch if microbatching is enabled. Easier to sync between
       dp workers."""
@ -4325,16 +4322,6 @@ class VllmConfig:
                "cascade attention. Disabling cascade attention.")
            self.model_config.disable_cascade_attn = True

-        if self.parallel_config.enable_microbatching and \
-            self.compilation_config.level >= CompilationLevel.PIECEWISE:
-            # Microbatching is not supported with piecewise compilation yet.
-            #  More specifically piecewise cuda-graphs
-            logger.warning_once(
-                "Piecewise compilation is not supported with "
-                "microbatching. Disabling piecewiseching compilation.")
-            self.compilation_config.level = CompilationLevel.NO_COMPILATION
-
-
        if self.model_config and self.model_config.use_mla and \
            not (current_platform.is_cuda() or current_platform.is_rocm()):
            logger.info(