[Core] Sharded State Loader download from HF (#4889)

2024-05-20 14:46:12 -04:00 · 2024-05-20 14:46:12 -04:00 · 1937e29848
commit 1937e29848
parent f0eecee610
1 changed files with 15 additions and 1 deletions
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@ -423,6 +423,16 @@ class ShardedStateLoader(BaseModelLoader):
                    result[k] = t
        return result
    def _prepare_weights(self, model_name_or_path: str,
                         revision: Optional[str]):
        if os.path.isdir(model_name_or_path):
            return model_name_or_path
        else:
            allow_patterns = ["*.safetensors"]
            return download_weights_from_hf(model_name_or_path,
                                            self.load_config.download_dir,
                                            allow_patterns, revision)
    def load_model(self, *, model_config: ModelConfig,
                   device_config: DeviceConfig,
                   lora_config: Optional[LoRAConfig],
@ -433,6 +443,10 @@ class ShardedStateLoader(BaseModelLoader):
        from safetensors.torch import safe_open
        from vllm.distributed import get_tensor_model_parallel_rank
        local_model_path = self._prepare_weights(model_config.model,
                                                 model_config.revision)
        with set_default_torch_dtype(model_config.dtype):
            with torch.device(device_config.device):
                model = _initialize_model(model_config, self.load_config,
@ -440,7 +454,7 @@ class ShardedStateLoader(BaseModelLoader):
                                          cache_config)
            rank = get_tensor_model_parallel_rank()
            pattern = os.path.join(
-                model_config.model,
+                local_model_path,
                self.pattern.format(rank=rank, part="*"),
            )
            filepaths = glob.glob(pattern)