From a049b107e207db796817fb83c4536e0625531d54 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Fri, 9 Aug 2024 04:42:58 +0800 Subject: [PATCH] [Misc] Temporarily resolve the error of BitAndBytes (#7308) --- vllm/config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 2ac31657..63a5acc5 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -322,8 +322,9 @@ class ModelConfig: "BitAndBytes quantization with TP or PP is not supported yet.") if self.quantization == "bitsandbytes" and self.enforce_eager is False: - raise ValueError( - "BitAndBytes with enforce_eager = False is not supported yet.") + logger.warning("CUDA graph is not supported on BitAndBytes yet, " + "fallback to the eager mode.") + self.enforce_eager = True def get_hf_config_sliding_window(self) -> Optional[int]: """Get the sliding window size, or None if disabled."""