diff --git a/vllm/config.py b/vllm/config.py index 6403a53f..92fde449 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -282,6 +282,10 @@ class ModelConfig: raise ValueError( "BitAndBytes quantization with TP or PP is not supported yet.") + if self.quantization == "bitsandbytes" and self.enforce_eager is False: + raise ValueError( + "BitAndBytes with enforce_eager = False is not supported yet.") + def get_hf_config_sliding_window(self) -> Optional[int]: """Get the sliding window size, or None if disabled."""