From b9bcdc715808c2ec110a6e98e98e4fbe0681f8bf Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 11 Dec 2023 10:32:17 -0800 Subject: [PATCH] Change the load format to pt for Mixtral (#2028) --- vllm/config.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/config.py b/vllm/config.py index a2739e5f..6bafa73c 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -119,6 +119,16 @@ class ModelConfig: # Force ROCm to load from pt weights if nothing specific is set if load_format == "auto": load_format = "pt" + + # FIXME(woosuk): This is a temporary hack. Support safetensor weights. + architectures = getattr(self.hf_config, "architectures", []) + if "MixtralForCausalLM" in architectures and load_format != "pt": + logger.info( + "Currently, only 'pt' format is supported for Mixtral. " + "Changing the format to 'pt'. This may re-download the " + "weights if you have downloaded the safetensor weights.") + load_format = "pt" + self.load_format = load_format def _verify_tokenizer_mode(self) -> None: