mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 01:45:02 +08:00
[Performance] Cache loaded custom logitsprocs to avoid overheads (#28462)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
48c879369f
commit
3f770f4427
@ -5,7 +5,7 @@ import inspect
|
|||||||
import itertools
|
import itertools
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from functools import partial
|
from functools import lru_cache, partial
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@ -216,11 +216,17 @@ def build_logitsprocs(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
cached_load_custom_logitsprocs = lru_cache(_load_custom_logitsprocs)
|
||||||
|
|
||||||
|
|
||||||
def validate_logits_processors_parameters(
|
def validate_logits_processors_parameters(
|
||||||
logits_processors: Sequence[str | type[LogitsProcessor]] | None,
|
logits_processors: Sequence[str | type[LogitsProcessor]] | None,
|
||||||
sampling_params: SamplingParams,
|
sampling_params: SamplingParams,
|
||||||
):
|
):
|
||||||
for logits_procs in _load_custom_logitsprocs(logits_processors):
|
logits_processors = (
|
||||||
|
tuple(logits_processors) if logits_processors is not None else None
|
||||||
|
)
|
||||||
|
for logits_procs in cached_load_custom_logitsprocs(logits_processors):
|
||||||
logits_procs.validate_params(sampling_params)
|
logits_procs.validate_params(sampling_params)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user