From 3f770f4427cb926c24af540cc72d1b5901f7f702 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Wed, 12 Nov 2025 08:49:29 +0800 Subject: [PATCH] [Performance] Cache loaded custom logitsprocs to avoid overheads (#28462) Signed-off-by: Isotr0py --- vllm/v1/sample/logits_processor/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/vllm/v1/sample/logits_processor/__init__.py b/vllm/v1/sample/logits_processor/__init__.py index eb537eae6c904..5992c4066c9cb 100644 --- a/vllm/v1/sample/logits_processor/__init__.py +++ b/vllm/v1/sample/logits_processor/__init__.py @@ -5,7 +5,7 @@ import inspect import itertools from abc import abstractmethod from collections.abc import Sequence -from functools import partial +from functools import lru_cache, partial from typing import TYPE_CHECKING import torch @@ -216,11 +216,17 @@ def build_logitsprocs( ) +cached_load_custom_logitsprocs = lru_cache(_load_custom_logitsprocs) + + def validate_logits_processors_parameters( logits_processors: Sequence[str | type[LogitsProcessor]] | None, sampling_params: SamplingParams, ): - for logits_procs in _load_custom_logitsprocs(logits_processors): + logits_processors = ( + tuple(logits_processors) if logits_processors is not None else None + ) + for logits_procs in cached_load_custom_logitsprocs(logits_processors): logits_procs.validate_params(sampling_params)