From 6d87a2838cfbddbfd0a13df6998e0e35bd816703 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 14 Oct 2025 11:47:49 -0400 Subject: [PATCH] [Config] Remove Unused Environment Variable `VLLM_DISABLE_PAD_FOR_CUDAGRAPH` (#26743) Signed-off-by: yewentao256 --- vllm/envs.py | 7 ------- vllm/v1/worker/gpu_model_runner.py | 1 - 2 files changed, 8 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index d93ae8b9c225..8d8c96297d91 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -198,7 +198,6 @@ if TYPE_CHECKING: VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: bool = False VLLM_ALLREDUCE_USE_SYMM_MEM: bool = True VLLM_TUNED_CONFIG_FOLDER: str | None = None - VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False @@ -1304,12 +1303,6 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_ENABLE_CUDAGRAPH_GC": lambda: bool( int(os.getenv("VLLM_ENABLE_CUDAGRAPH_GC", "0")) ), - # Disable padding to CUDA graph capture batch sizes. - # TODO(wentao): https://github.com/vllm-project/vllm/issues/23378 - # After the issue is fixed, we can remove this flag. - "VLLM_DISABLE_PAD_FOR_CUDAGRAPH": lambda: bool( - int(os.getenv("VLLM_DISABLE_PAD_FOR_CUDAGRAPH", "0")) - ), # Used to force set up loopback IP "VLLM_LOOPBACK_IP": lambda: os.getenv("VLLM_LOOPBACK_IP", ""), # Used to set the process name prefix for vLLM processes. diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index ce174664710b..67a93fed5274 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2067,7 +2067,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): def _get_num_input_tokens(self, num_scheduled_tokens: int) -> int: if ( self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE - and not envs.VLLM_DISABLE_PAD_FOR_CUDAGRAPH and hasattr(self, "cudagraph_batch_sizes") and self.cudagraph_batch_sizes and num_scheduled_tokens <= self.cudagraph_batch_sizes[-1]