From 203d4f82ac137f04c0e487bbdbba86b3cf3ae7bf Mon Sep 17 00:00:00 2001 From: youkaichao Date: Fri, 29 Mar 2024 18:46:39 -0700 Subject: [PATCH] [Core][Bugfix] cache len of tokenizer (#3741) --- vllm/transformers_utils/tokenizer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index dad20a56..3bda3f41 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -26,6 +26,7 @@ def get_cached_tokenizer( tokenizer_all_special_tokens_extended = ( tokenizer.all_special_tokens_extended) tokenizer_all_special_tokens = set(tokenizer.all_special_tokens) + tokenizer_len = len(tokenizer) class CachedTokenizer(tokenizer.__class__): @@ -41,6 +42,9 @@ def get_cached_tokenizer( def all_special_tokens_extended(self): return tokenizer_all_special_tokens_extended + def __len__(self): + return tokenizer_len + CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}" tokenizer.__class__ = CachedTokenizer