From a049b107e207db796817fb83c4536e0625531d54 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Fri, 9 Aug 2024 04:42:58 +0800
Subject: [PATCH] [Misc] Temporarily resolve the error of BitAndBytes (#7308)

---
 vllm/config.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 2ac31657..63a5acc5 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -322,8 +322,9 @@ class ModelConfig:
                 "BitAndBytes quantization with TP or PP is not supported yet.")
 
         if self.quantization == "bitsandbytes" and self.enforce_eager is False:
-            raise ValueError(
-                "BitAndBytes with enforce_eager = False is not supported yet.")
+            logger.warning("CUDA graph is not supported on BitAndBytes yet, "
+                           "fallback to the eager mode.")
+            self.enforce_eager = True
 
     def get_hf_config_sliding_window(self) -> Optional[int]:
         """Get the sliding window size, or None if disabled."""