diff --git a/vllm/envs.py b/vllm/envs.py index e7ab320b4edca..c8263de0dd9ce 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -42,7 +42,6 @@ if TYPE_CHECKING: VLLM_LOGGING_PREFIX: str = "" VLLM_LOGGING_STREAM: str = "ext://sys.stdout" VLLM_LOGGING_CONFIG_PATH: str | None = None - VLLM_LOGITS_PROCESSOR_THREADS: int | None = None VLLM_LOG_STATS_INTERVAL: float = 10.0 VLLM_TRACE_FUNCTION: int = 0 VLLM_ATTENTION_BACKEND: str | None = None @@ -142,7 +141,6 @@ if TYPE_CHECKING: VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict" VLLM_MARLIN_USE_ATOMIC_ADD: bool = False VLLM_MXFP4_USE_MARLIN: bool | None = None - VLLM_V0_USE_OUTLINES_CACHE: bool = False VLLM_V1_USE_OUTLINES_CACHE: bool = False VLLM_TPU_BUCKET_PADDING_GAP: int = 0 VLLM_TPU_MOST_MODEL_LEN: int | None = None @@ -567,15 +565,6 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_LOGGING_STREAM": lambda: os.getenv("VLLM_LOGGING_STREAM", "ext://sys.stdout"), # if set, VLLM_LOGGING_PREFIX will be prepended to all log messages "VLLM_LOGGING_PREFIX": lambda: os.getenv("VLLM_LOGGING_PREFIX", ""), - # if set, vllm will call logits processors in a thread pool with this many - # threads. This is useful when using custom logits processors that either - # (a) launch additional CUDA kernels or (b) do significant CPU-bound work - # while not holding the python GIL, or both. - "VLLM_LOGITS_PROCESSOR_THREADS": lambda: int( - os.getenv("VLLM_LOGITS_PROCESSOR_THREADS", "0") - ) - if "VLLM_LOGITS_PROCESSOR_THREADS" in os.environ - else None, # If set, vllm will log stats at this interval in seconds # If not set, vllm will log stats every 10 seconds. "VLLM_LOG_STATS_INTERVAL": lambda: val @@ -1066,13 +1055,6 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_MXFP4_USE_MARLIN": lambda: maybe_convert_bool( os.environ.get("VLLM_MXFP4_USE_MARLIN", None) ), - # Whether to turn on the outlines cache for V0 - # This cache is unbounded and on disk, so it's not safe to use in - # an environment with potentially malicious users. - "VLLM_V0_USE_OUTLINES_CACHE": lambda: os.environ.get( - "VLLM_V0_USE_OUTLINES_CACHE", "0" - ) - == "1", # Whether to turn on the outlines cache for V1 # This cache is unbounded and on disk, so it's not safe to use in # an environment with potentially malicious users.