diff --git a/vllm/config.py b/vllm/config.py index ec6d587e..2ac31657 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1377,11 +1377,6 @@ class LoRAConfig: model_config.quantization) def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig): - if scheduler_config.max_num_batched_tokens > 65528: - raise ValueError( - "Due to limitations of the custom LoRA CUDA kernel, " - "max_num_batched_tokens must be <= 65528 when " - "LoRA is enabled.") if scheduler_config.chunked_prefill_enabled: raise ValueError("LoRA is not supported with chunked prefill yet.")