From ec3b5ce9ccb4262194a16a8b1c31ffd6b3b824b9 Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Fri, 13 Oct 2023 09:59:07 -0700
Subject: [PATCH] Improve detokenization performance (#1338)

---
 vllm/transformers_utils/tokenizer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 6dafdac9..49e7007a 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -81,10 +81,11 @@ def _convert_tokens_to_string_with_added_encoders(
     # even when the loop body is very simple.
     sub_texts = []
     current_sub_text = []
+    all_special_tokens = set(tokenizer.all_special_tokens)
     for token in output_tokens:
-        if skip_special_tokens and token in tokenizer.all_special_tokens:
+        if skip_special_tokens and token in all_special_tokens:
             continue
-        if token in tokenizer.added_tokens_encoder:
+        if token in tokenizer.get_added_vocab():
             if current_sub_text:
                 sub_text = tokenizer.convert_tokens_to_string(current_sub_text)
                 sub_texts.append(sub_text)
@@ -129,7 +130,7 @@ def detokenize_incrementally(
     # The prefix text is necessary only to defeat cleanup algorithms in
     # the decode which decide to add a space or not depending on the
     # surrounding ids.
-    if not getattr(tokenizer, "added_tokens_encoder", {}):
+    if tokenizer.is_fast or not tokenizer.get_added_vocab():
         prefix_text = tokenizer.convert_tokens_to_string(
             output_tokens[prefix_offset:read_offset])
         new_text = tokenizer.convert_tokens_to_string(