[Perf] Speed up function _convert_tokens_to_string_with_added_encoders by 13.7x (#20413)

Signed-off-by: Saurabh Misra <misra.saurabh1@gmail.com> Signed-off-by: Aseem Saxena <aseem.bits@gmail.com> Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com> Co-authored-by: Aseem Saxena <aseem.bits@gmail.com>
2025-12-22 20:45:01 +08:00 · 2025-08-20 13:17:11 -07:00 · 2025-08-20 13:17:11 -07:00 · bf7c99dfc4
commit bf7c99dfc4
parent b95697d731
1 changed files with 15 additions and 10 deletions
--- a/vllm/transformers_utils/detokenizer_utils.py
+++ b/vllm/transformers_utils/detokenizer_utils.py
@ -23,26 +23,31 @@ def _convert_tokens_to_string_with_added_encoders(
    # NOTE(woosuk): The following code is slow because it runs a for loop over
    # the output_tokens. In Python, running a for loop over a list can be slow
    # even when the loop body is very simple.
    # Performance improvements: avoid repeated attribute and function lookups;
    # localize frequently used objects;
    sub_texts: list[str] = []
    current_sub_text: list[str] = []
-    all_special_tokens = set(tokenizer.all_special_tokens)
+    convert_tokens_to_string = tokenizer.convert_tokens_to_string
    added_vocab_set = set(tokenizer.get_added_vocab())
    all_special_tokens = set(
        tokenizer.all_special_tokens) if skip_special_tokens else ()
    for token in output_tokens:
-        if skip_special_tokens and token in all_special_tokens:
+        # Use precomputed set for skip-special check
        if token in all_special_tokens:
            continue
-        if token in tokenizer.get_added_vocab():
+        if token in added_vocab_set:
            if current_sub_text:
-                sub_text = tokenizer.convert_tokens_to_string(current_sub_text)
+                sub_texts.append(convert_tokens_to_string(current_sub_text))
-                sub_texts.append(sub_text)
+                current_sub_text.clear()
                current_sub_text = []
            sub_texts.append(token)
        else:
            current_sub_text.append(token)
    if current_sub_text:
-        sub_text = tokenizer.convert_tokens_to_string(current_sub_text)
+        sub_texts.append(convert_tokens_to_string(current_sub_text))
        sub_texts.append(sub_text)
    if spaces_between_special_tokens:
        return " ".join(sub_texts)
    else:
    return "".join(sub_texts)