mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-11 15:55:43 +08:00
[Bugfix] Read truncate_prompt_tokens from pooling_params in AsyncLLM.encode() (#31013)
Signed-off-by: Jeffrey Wang <jeffreywang@anyscale.com>
This commit is contained in:
parent
ff2168bca3
commit
1501a4070e
@ -71,7 +71,11 @@ class EngineClient(ABC):
|
|||||||
truncate_prompt_tokens: int | None = None,
|
truncate_prompt_tokens: int | None = None,
|
||||||
tokenization_kwargs: dict[str, Any] | None = None,
|
tokenization_kwargs: dict[str, Any] | None = None,
|
||||||
) -> AsyncGenerator[PoolingRequestOutput, None]:
|
) -> AsyncGenerator[PoolingRequestOutput, None]:
|
||||||
"""Generate outputs for a request from a pooling model."""
|
"""Generate outputs for a request from a pooling model.
|
||||||
|
|
||||||
|
NOTE: truncate_prompt_tokens is deprecated in v0.14.
|
||||||
|
TODO: Remove this argument in v0.15.
|
||||||
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import asyncio
|
|||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
|
import warnings
|
||||||
from collections.abc import AsyncGenerator, Iterable, Mapping
|
from collections.abc import AsyncGenerator, Iterable, Mapping
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
@ -627,6 +628,9 @@ class AsyncLLM(EngineClient):
|
|||||||
|
|
||||||
The caller of generate() iterates the returned AsyncGenerator,
|
The caller of generate() iterates the returned AsyncGenerator,
|
||||||
returning the RequestOutput back to the caller.
|
returning the RequestOutput back to the caller.
|
||||||
|
|
||||||
|
NOTE: truncate_prompt_tokens is deprecated in v0.14.
|
||||||
|
TODO: Remove truncate_prompt_tokens in v0.15.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -641,9 +645,19 @@ class AsyncLLM(EngineClient):
|
|||||||
|
|
||||||
if tokenization_kwargs is None:
|
if tokenization_kwargs is None:
|
||||||
tokenization_kwargs = {}
|
tokenization_kwargs = {}
|
||||||
|
|
||||||
|
if truncate_prompt_tokens is not None:
|
||||||
|
warnings.warn(
|
||||||
|
"The `truncate_prompt_tokens` parameter in `AsyncLLM.encode()` "
|
||||||
|
"is deprecated and will be removed in v0.15. "
|
||||||
|
"Please use `pooling_params.truncate_prompt_tokens` instead.",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
_validate_truncation_size(
|
_validate_truncation_size(
|
||||||
self.model_config.max_model_len,
|
self.model_config.max_model_len,
|
||||||
truncate_prompt_tokens,
|
pooling_params.truncate_prompt_tokens,
|
||||||
tokenization_kwargs,
|
tokenization_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user