[Bugfix] store lock file in tmp directory (#3578)" (#3599)

Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
Woosuk Kwon 2024-03-24 20:06:50 -07:00 committed by GitHub
parent 6d93d35308
commit 56a8652f33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,6 +1,7 @@
"""Utilities for downloading and initializing model weights.""" """Utilities for downloading and initializing model weights."""
import filelock import filelock
import glob import glob
import hashlib
import fnmatch import fnmatch
import json import json
import os import os
@ -20,8 +21,12 @@ from vllm.model_executor.layers.quantization import (get_quantization_config,
logger = init_logger(__name__) logger = init_logger(__name__)
_xdg_cache_home = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) # use system-level temp directory for file locks, so that multiple users
_vllm_filelocks_path = os.path.join(_xdg_cache_home, 'vllm/locks/') # can share the same lock without error.
# lock files in the temp directory will be automatically deleted when the
# system reboots, so users will not complain about annoying lock files
temp_dir = os.environ.get('TMPDIR') or os.environ.get(
'TEMP') or os.environ.get('TMP') or "/tmp/"
class Disabledtqdm(tqdm): class Disabledtqdm(tqdm):
@ -31,10 +36,15 @@ class Disabledtqdm(tqdm):
def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None): def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
lock_dir = cache_dir if cache_dir is not None else _vllm_filelocks_path lock_dir = cache_dir or temp_dir
os.makedirs(os.path.dirname(lock_dir), exist_ok=True) os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
lock_file_name = model_name_or_path.replace("/", "-") + ".lock" model_name = model_name_or_path.replace("/", "-")
lock = filelock.SoftFileLock(os.path.join(lock_dir, lock_file_name)) hash_name = hashlib.sha256(model_name.encode()).hexdigest()
# add hash to avoid conflict with old users' lock files
lock_file_name = hash_name + model_name + ".lock"
# mode 0o666 is required for the filelock to be shared across users
lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name),
mode=0o666)
return lock return lock