From bb5494676f5f57f1cf7cf72598de5434a2a22865 Mon Sep 17 00:00:00 2001 From: chenqianfzh <51831990+chenqianfzh@users.noreply.github.com> Date: Fri, 26 Jul 2024 18:32:20 -0700 Subject: [PATCH] enforce eager mode with bnb quantization temporarily (#6846) --- vllm/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/config.py b/vllm/config.py index 6403a53f..92fde449 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -282,6 +282,10 @@ class ModelConfig: raise ValueError( "BitAndBytes quantization with TP or PP is not supported yet.") + if self.quantization == "bitsandbytes" and self.enforce_eager is False: + raise ValueError( + "BitAndBytes with enforce_eager = False is not supported yet.") + def get_hf_config_sliding_window(self) -> Optional[int]: """Get the sliding window size, or None if disabled."""