format

Signed-off-by: Reagan <reaganjlee@gmail.com>
2026-07-16 11:57:13 +08:00 · 2025-12-16 17:07:02 -08:00 · 2025-12-16 17:07:02 -08:00 · 6275b3c1b9
commit 6275b3c1b9
parent 169be31b01
1 changed files with 27 additions and 35 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -651,11 +651,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "NO_COLOR": lambda: os.getenv("NO_COLOR", "0") != "0",
    # If set, vllm will log stats at this interval in seconds
    # If not set, vllm will log stats every 10 seconds.
-    "VLLM_LOG_STATS_INTERVAL": lambda: (
-        val
-        if (val := float(os.getenv("VLLM_LOG_STATS_INTERVAL", "10."))) > 0.0
-        else 10.0
-    ),
+    "VLLM_LOG_STATS_INTERVAL": lambda: val
+    if (val := float(os.getenv("VLLM_LOG_STATS_INTERVAL", "10."))) > 0.0
+    else 10.0,
    # Trace function calls
    # If set to 1, vllm will trace function calls
    # Useful for debugging
@ -680,30 +678,28 @@ environment_variables: dict[str, Callable[[], Any]] = {
        ),
    ),
    # If set, vllm will use flashinfer sampler
-    "VLLM_USE_FLASHINFER_SAMPLER": lambda: (
-        bool(int(os.environ["VLLM_USE_FLASHINFER_SAMPLER"]))
-        if "VLLM_USE_FLASHINFER_SAMPLER" in os.environ
-        else None
-    ),
+    "VLLM_USE_FLASHINFER_SAMPLER": lambda: bool(
+        int(os.environ["VLLM_USE_FLASHINFER_SAMPLER"])
+    )
+    if "VLLM_USE_FLASHINFER_SAMPLER" in os.environ
+    else None,
    # Pipeline stage partition strategy
    "VLLM_PP_LAYER_PARTITION": lambda: os.getenv("VLLM_PP_LAYER_PARTITION", None),
    # (CPU backend only) CPU key-value cache space.
    # default is None and will be set as 4 GB
-    "VLLM_CPU_KVCACHE_SPACE": lambda: (
-        int(os.getenv("VLLM_CPU_KVCACHE_SPACE", "0"))
-        if "VLLM_CPU_KVCACHE_SPACE" in os.environ
-        else None
-    ),
+    "VLLM_CPU_KVCACHE_SPACE": lambda: int(os.getenv("VLLM_CPU_KVCACHE_SPACE", "0"))
+    if "VLLM_CPU_KVCACHE_SPACE" in os.environ
+    else None,
    # (CPU backend only) CPU core ids bound by OpenMP threads, e.g., "0-31",
    # "0,1,2", "0-31,33". CPU cores of different ranks are separated by '|'.
    "VLLM_CPU_OMP_THREADS_BIND": lambda: os.getenv("VLLM_CPU_OMP_THREADS_BIND", "auto"),
    # (CPU backend only) CPU cores not used by OMP threads .
    # Those CPU cores will not be used by OMP threads of a rank.
-    "VLLM_CPU_NUM_OF_RESERVED_CPU": lambda: (
-        int(os.getenv("VLLM_CPU_NUM_OF_RESERVED_CPU", "0"))
-        if "VLLM_CPU_NUM_OF_RESERVED_CPU" in os.environ
-        else None
-    ),
+    "VLLM_CPU_NUM_OF_RESERVED_CPU": lambda: int(
+        os.getenv("VLLM_CPU_NUM_OF_RESERVED_CPU", "0")
+    )
+    if "VLLM_CPU_NUM_OF_RESERVED_CPU" in os.environ
+    else None,
    # (CPU backend only) whether to use SGL kernels, optimized for small batch.
    "VLLM_CPU_SGL_KERNEL": lambda: bool(int(os.getenv("VLLM_CPU_SGL_KERNEL", "0"))),
    # If the env var is set, Ray Compiled Graph uses the specified
@ -847,11 +843,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # a list of plugin names to load, separated by commas.
    # if this is not set, it means all plugins will be loaded
    # if this is set to an empty string, no plugins will be loaded
-    "VLLM_PLUGINS": lambda: (
-        None
-        if "VLLM_PLUGINS" not in os.environ
-        else os.environ["VLLM_PLUGINS"].split(",")
-    ),
+    "VLLM_PLUGINS": lambda: None
+    if "VLLM_PLUGINS" not in os.environ
+    else os.environ["VLLM_PLUGINS"].split(","),
    # a local directory to look in for unrecognized LoRA adapters.
    # only works if plugins are enabled and
    # VLLM_ALLOW_RUNTIME_LORA_UPDATING is enabled.
@ -923,11 +917,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # and performance comparisons. Currently only affects MPLinearKernel
    # selection
    # (kernels: MacheteLinearKernel, MarlinLinearKernel, ExllamaLinearKernel)
-    "VLLM_DISABLED_KERNELS": lambda: (
-        []
-        if "VLLM_DISABLED_KERNELS" not in os.environ
-        else os.environ["VLLM_DISABLED_KERNELS"].split(",")
-    ),
+    "VLLM_DISABLED_KERNELS": lambda: []
+    if "VLLM_DISABLED_KERNELS" not in os.environ
+    else os.environ["VLLM_DISABLED_KERNELS"].split(","),
    # Disable pynccl (using torch.distributed instead)
    "VLLM_DISABLE_PYNCCL": lambda: (
        os.getenv("VLLM_DISABLE_PYNCCL", "False").lower() in ("true", "1")
@ -1163,11 +1155,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
    == "1",
    # Gap between padding buckets for the forward pass. So we have
    # 8, we will run forward pass with [16, 24, 32, ...].
-    "VLLM_TPU_BUCKET_PADDING_GAP": lambda: (
-        int(os.environ["VLLM_TPU_BUCKET_PADDING_GAP"])
-        if "VLLM_TPU_BUCKET_PADDING_GAP" in os.environ
-        else 0
-    ),
+    "VLLM_TPU_BUCKET_PADDING_GAP": lambda: int(
+        os.environ["VLLM_TPU_BUCKET_PADDING_GAP"]
+    )
+    if "VLLM_TPU_BUCKET_PADDING_GAP" in os.environ
+    else 0,
    "VLLM_TPU_MOST_MODEL_LEN": lambda: maybe_convert_int(
        os.environ.get("VLLM_TPU_MOST_MODEL_LEN", None)
    ),