From ec3b5ce9ccb4262194a16a8b1c31ffd6b3b824b9 Mon Sep 17 00:00:00 2001 From: Antoni Baum Date: Fri, 13 Oct 2023 09:59:07 -0700 Subject: [PATCH] Improve detokenization performance (#1338) --- vllm/transformers_utils/tokenizer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 6dafdac9..49e7007a 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -81,10 +81,11 @@ def _convert_tokens_to_string_with_added_encoders( # even when the loop body is very simple. sub_texts = [] current_sub_text = [] + all_special_tokens = set(tokenizer.all_special_tokens) for token in output_tokens: - if skip_special_tokens and token in tokenizer.all_special_tokens: + if skip_special_tokens and token in all_special_tokens: continue - if token in tokenizer.added_tokens_encoder: + if token in tokenizer.get_added_vocab(): if current_sub_text: sub_text = tokenizer.convert_tokens_to_string(current_sub_text) sub_texts.append(sub_text) @@ -129,7 +130,7 @@ def detokenize_incrementally( # The prefix text is necessary only to defeat cleanup algorithms in # the decode which decide to add a space or not depending on the # surrounding ids. - if not getattr(tokenizer, "added_tokens_encoder", {}): + if tokenizer.is_fast or not tokenizer.get_added_vocab(): prefix_text = tokenizer.convert_tokens_to_string( output_tokens[prefix_offset:read_offset]) new_text = tokenizer.convert_tokens_to_string(