From 48abee9e5492924a69551d859d66d98874d72d60 Mon Sep 17 00:00:00 2001
From: Cherilyn Buren <88433283+NiuBlibing@users.noreply.github.com>
Date: Thu, 8 Aug 2024 14:17:29 +0800
Subject: [PATCH] [Frontend] remove max_num_batched_tokens limit for lora
 (#7288)

---
 vllm/config.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index ec6d587e..2ac31657 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1377,11 +1377,6 @@ class LoRAConfig:
                            model_config.quantization)
 
     def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
-        if scheduler_config.max_num_batched_tokens > 65528:
-            raise ValueError(
-                "Due to limitations of the custom LoRA CUDA kernel, "
-                "max_num_batched_tokens must be <= 65528 when "
-                "LoRA is enabled.")
         if scheduler_config.chunked_prefill_enabled:
             raise ValueError("LoRA is not supported with chunked prefill yet.")