mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-12 22:56:11 +08:00
[BugFix] Fix tokenize asyncio task leak (#24677)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
c733bd5e87
commit
b971f91504
@ -168,7 +168,7 @@ class BaseRenderer(ABC):
|
|||||||
|
|
||||||
if isinstance(prompt_embeds, list):
|
if isinstance(prompt_embeds, list):
|
||||||
return [_load_and_validate_embed(embed) for embed in prompt_embeds]
|
return [_load_and_validate_embed(embed) for embed in prompt_embeds]
|
||||||
else:
|
|
||||||
return [_load_and_validate_embed(prompt_embeds)]
|
return [_load_and_validate_embed(prompt_embeds)]
|
||||||
|
|
||||||
|
|
||||||
@ -182,7 +182,7 @@ class CompletionRenderer(BaseRenderer):
|
|||||||
AsyncMicrobatchTokenizer]] = None,
|
AsyncMicrobatchTokenizer]] = None,
|
||||||
):
|
):
|
||||||
super().__init__(model_config, tokenizer)
|
super().__init__(model_config, tokenizer)
|
||||||
self.async_tokenizer_pool = async_tokenizer_pool or {}
|
self.async_tokenizer_pool = async_tokenizer_pool
|
||||||
self.async_tokenizer: Optional[AsyncMicrobatchTokenizer] = None
|
self.async_tokenizer: Optional[AsyncMicrobatchTokenizer] = None
|
||||||
|
|
||||||
async def render_prompt(
|
async def render_prompt(
|
||||||
@ -208,23 +208,21 @@ class CompletionRenderer(BaseRenderer):
|
|||||||
for prompt_input in batch_inputs:
|
for prompt_input in batch_inputs:
|
||||||
if prompt_input["is_tokens"] is True:
|
if prompt_input["is_tokens"] is True:
|
||||||
# Token input
|
# Token input
|
||||||
detokenize_task = asyncio.create_task(
|
|
||||||
# Note: detokenization is needed when echo is enabled,
|
# Note: detokenization is needed when echo is enabled,
|
||||||
# where the input token IDs are decoded back to text.
|
# where the input token IDs are decoded back to text.
|
||||||
self._maybe_detokenize(prompt_input["content"],
|
task = self._maybe_detokenize(prompt_input["content"],
|
||||||
config.max_length,
|
config.max_length,
|
||||||
truncate_prompt_tokens,
|
truncate_prompt_tokens,
|
||||||
config.cache_salt,
|
config.cache_salt,
|
||||||
config.needs_detokenization))
|
config.needs_detokenization)
|
||||||
tasks.append(detokenize_task)
|
|
||||||
else:
|
else:
|
||||||
# Text input
|
# Text input
|
||||||
tokenize_task = asyncio.create_task(
|
task = self._tokenize(prompt_input["content"],
|
||||||
self._tokenize(prompt_input["content"], config.max_length,
|
config.max_length,
|
||||||
truncate_prompt_tokens,
|
truncate_prompt_tokens,
|
||||||
config.add_special_tokens,
|
config.add_special_tokens,
|
||||||
config.cache_salt))
|
config.cache_salt)
|
||||||
tasks.append(tokenize_task)
|
tasks.append(task)
|
||||||
|
|
||||||
# Wait for all text tokenization to finish
|
# Wait for all text tokenization to finish
|
||||||
if tasks:
|
if tasks:
|
||||||
@ -356,20 +354,24 @@ class CompletionRenderer(BaseRenderer):
|
|||||||
|
|
||||||
def _get_async_tokenizer(self) -> AsyncMicrobatchTokenizer:
|
def _get_async_tokenizer(self) -> AsyncMicrobatchTokenizer:
|
||||||
"""Get or create async tokenizer using shared pool."""
|
"""Get or create async tokenizer using shared pool."""
|
||||||
if self.async_tokenizer is not None:
|
async_tokenizer = self.async_tokenizer
|
||||||
return self.async_tokenizer
|
if async_tokenizer is not None:
|
||||||
|
return async_tokenizer
|
||||||
|
|
||||||
|
tokenizer = self.tokenizer
|
||||||
if self.tokenizer is None:
|
if self.tokenizer is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"No tokenizer available for text input processing")
|
"No tokenizer available for text input processing")
|
||||||
|
|
||||||
# Check shared pool first
|
if self.async_tokenizer_pool is None:
|
||||||
if self.tokenizer in self.async_tokenizer_pool:
|
async_tokenizer = AsyncMicrobatchTokenizer(tokenizer)
|
||||||
return self.async_tokenizer_pool[self.tokenizer]
|
else:
|
||||||
|
async_tokenizer = self.async_tokenizer_pool.get(tokenizer)
|
||||||
# Create new async tokenizer and add to pool
|
if async_tokenizer is None:
|
||||||
self.async_tokenizer = AsyncMicrobatchTokenizer(self.tokenizer)
|
async_tokenizer = AsyncMicrobatchTokenizer(tokenizer)
|
||||||
self.async_tokenizer_pool[self.tokenizer] = self.async_tokenizer
|
self.async_tokenizer_pool[tokenizer] = async_tokenizer
|
||||||
return self.async_tokenizer
|
self.async_tokenizer = async_tokenizer
|
||||||
|
return async_tokenizer
|
||||||
|
|
||||||
def _create_tokens_prompt(
|
def _create_tokens_prompt(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user