From b9bcdc715808c2ec110a6e98e98e4fbe0681f8bf Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 11 Dec 2023 10:32:17 -0800
Subject: [PATCH] Change the load format to pt for Mixtral (#2028)

---
 vllm/config.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/vllm/config.py b/vllm/config.py
index a2739e5f..6bafa73c 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -119,6 +119,16 @@ class ModelConfig:
             # Force ROCm to load from pt weights if nothing specific is set
             if load_format == "auto":
                 load_format = "pt"
+
+        # FIXME(woosuk): This is a temporary hack. Support safetensor weights.
+        architectures = getattr(self.hf_config, "architectures", [])
+        if "MixtralForCausalLM" in architectures and load_format != "pt":
+            logger.info(
+                "Currently, only 'pt' format is supported for Mixtral. "
+                "Changing the format to 'pt'. This may re-download the "
+                "weights if you have downloaded the safetensor weights.")
+            load_format = "pt"
+
         self.load_format = load_format
 
     def _verify_tokenizer_mode(self) -> None: