mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 06:05:24 +08:00
[Config] Remove Unused Environment Variable VLLM_DISABLE_PAD_FOR_CUDAGRAPH (#26743)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
e6cdbd6792
commit
6d87a2838c
@ -198,7 +198,6 @@ if TYPE_CHECKING:
|
|||||||
VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: bool = False
|
VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: bool = False
|
||||||
VLLM_ALLREDUCE_USE_SYMM_MEM: bool = True
|
VLLM_ALLREDUCE_USE_SYMM_MEM: bool = True
|
||||||
VLLM_TUNED_CONFIG_FOLDER: str | None = None
|
VLLM_TUNED_CONFIG_FOLDER: str | None = None
|
||||||
VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
|
|
||||||
VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
|
VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
|
||||||
VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
|
VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
|
||||||
VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
|
VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
|
||||||
@ -1304,12 +1303,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_ENABLE_CUDAGRAPH_GC": lambda: bool(
|
"VLLM_ENABLE_CUDAGRAPH_GC": lambda: bool(
|
||||||
int(os.getenv("VLLM_ENABLE_CUDAGRAPH_GC", "0"))
|
int(os.getenv("VLLM_ENABLE_CUDAGRAPH_GC", "0"))
|
||||||
),
|
),
|
||||||
# Disable padding to CUDA graph capture batch sizes.
|
|
||||||
# TODO(wentao): https://github.com/vllm-project/vllm/issues/23378
|
|
||||||
# After the issue is fixed, we can remove this flag.
|
|
||||||
"VLLM_DISABLE_PAD_FOR_CUDAGRAPH": lambda: bool(
|
|
||||||
int(os.getenv("VLLM_DISABLE_PAD_FOR_CUDAGRAPH", "0"))
|
|
||||||
),
|
|
||||||
# Used to force set up loopback IP
|
# Used to force set up loopback IP
|
||||||
"VLLM_LOOPBACK_IP": lambda: os.getenv("VLLM_LOOPBACK_IP", ""),
|
"VLLM_LOOPBACK_IP": lambda: os.getenv("VLLM_LOOPBACK_IP", ""),
|
||||||
# Used to set the process name prefix for vLLM processes.
|
# Used to set the process name prefix for vLLM processes.
|
||||||
|
|||||||
@ -2067,7 +2067,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
def _get_num_input_tokens(self, num_scheduled_tokens: int) -> int:
|
def _get_num_input_tokens(self, num_scheduled_tokens: int) -> int:
|
||||||
if (
|
if (
|
||||||
self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE
|
self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE
|
||||||
and not envs.VLLM_DISABLE_PAD_FOR_CUDAGRAPH
|
|
||||||
and hasattr(self, "cudagraph_batch_sizes")
|
and hasattr(self, "cudagraph_batch_sizes")
|
||||||
and self.cudagraph_batch_sizes
|
and self.cudagraph_batch_sizes
|
||||||
and num_scheduled_tokens <= self.cudagraph_batch_sizes[-1]
|
and num_scheduled_tokens <= self.cudagraph_batch_sizes[-1]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user