diff --git a/vllm/model_executor/model_loader.py b/vllm/model_executor/model_loader.py index 951ba1f0..82c8dfa7 100644 --- a/vllm/model_executor/model_loader.py +++ b/vllm/model_executor/model_loader.py @@ -35,6 +35,7 @@ _MODEL_REGISTRY = { # FIXME(woosuk): Remove this once all models support quantization. _MODEL_CLASSES_SUPPORT_QUANTIZATION = [ LlamaForCausalLM, + MistralForCausalLM, ]