From 7a865f2325b948f9b6bc6523b1ab4dfe2aa267a0 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Tue, 28 Oct 2025 04:17:45 -0700 Subject: [PATCH] [V0 Deprecation] Remove vestigial V0 logits_processors.py file (#27601) Signed-off-by: Nick Hill --- vllm/entrypoints/openai/logits_processors.py | 92 -------------------- 1 file changed, 92 deletions(-) delete mode 100644 vllm/entrypoints/openai/logits_processors.py diff --git a/vllm/entrypoints/openai/logits_processors.py b/vllm/entrypoints/openai/logits_processors.py deleted file mode 100644 index dedbc23ec83fa..0000000000000 --- a/vllm/entrypoints/openai/logits_processors.py +++ /dev/null @@ -1,92 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from collections.abc import Iterable -from functools import lru_cache, partial - -import torch - -from vllm.sampling_params import LogitsProcessor -from vllm.transformers_utils.tokenizer import AnyTokenizer - - -class AllowedTokenIdsLogitsProcessor: - """Logits processor for constraining generated tokens to a - specific set of token ids.""" - - def __init__(self, allowed_ids: Iterable[int]): - self.allowed_ids: list[int] | None = list(allowed_ids) - self.mask: torch.Tensor | None = None - - def __call__(self, token_ids: list[int], logits: torch.Tensor) -> torch.Tensor: - if self.mask is None: - self.mask = torch.ones( - (logits.shape[-1],), dtype=torch.bool, device=logits.device - ) - self.mask[self.allowed_ids] = False - self.allowed_ids = None - logits.masked_fill_(self.mask, float("-inf")) - return logits - - -@lru_cache(maxsize=32) -def _get_allowed_token_ids_logits_processor( - allowed_token_ids: frozenset[int], - vocab_size: int, -) -> LogitsProcessor: - if not allowed_token_ids: - raise ValueError("Empty allowed_token_ids provided") - if not all(0 <= tid < vocab_size for tid in allowed_token_ids): - raise ValueError("allowed_token_ids contains out-of-vocab token id") - return AllowedTokenIdsLogitsProcessor(allowed_token_ids) - - -def logit_bias_logits_processor( - logit_bias: dict[int, float], - token_ids: list[int], - logits: torch.Tensor, -) -> torch.Tensor: - for token_id, bias in logit_bias.items(): - logits[token_id] += bias - return logits - - -def get_logits_processors( - logit_bias: dict[int, float] | dict[str, float] | None, - allowed_token_ids: list[int] | None, - tokenizer: AnyTokenizer, -) -> list[LogitsProcessor]: - logits_processors: list[LogitsProcessor] = [] - if logit_bias: - try: - # Convert token_id to integer - # Clamp the bias between -100 and 100 per OpenAI API spec - clamped_logit_bias: dict[int, float] = { - int(token_id): min(100.0, max(-100.0, bias)) - for token_id, bias in logit_bias.items() - } - except ValueError as exc: - raise ValueError( - "Found token_id in logit_bias that is not " - "an integer or string representing an integer" - ) from exc - - # Check if token_id is within the vocab size - for token_id, bias in clamped_logit_bias.items(): - if token_id < 0 or token_id >= len(tokenizer): - raise ValueError( - f"token_id {token_id} in logit_bias contains out-of-vocab token id" - ) - - logits_processors.append( - partial(logit_bias_logits_processor, clamped_logit_bias) - ) - - if allowed_token_ids is not None: - logits_processors.append( - _get_allowed_token_ids_logits_processor( - frozenset(allowed_token_ids), len(tokenizer) - ) - ) - - return logits_processors