[Bugfix] Add file lock for ModelScope download (#14060)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li 2025-03-01 14:10:28 +08:00 committed by GitHub
parent f64ffa8c25
commit 6a84164add
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 40 additions and 22 deletions

View File

@ -14,6 +14,8 @@ from tqdm.asyncio import tqdm
from transformers import (AutoTokenizer, PreTrainedTokenizer, from transformers import (AutoTokenizer, PreTrainedTokenizer,
PreTrainedTokenizerFast) PreTrainedTokenizerFast)
from vllm.model_executor.model_loader.weight_utils import get_lock
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
@ -430,6 +432,9 @@ def get_model(pretrained_model_name_or_path: str) -> str:
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
from modelscope import snapshot_download from modelscope import snapshot_download
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with get_lock(pretrained_model_name_or_path):
model_path = snapshot_download( model_path = snapshot_download(
model_id=pretrained_model_name_or_path, model_id=pretrained_model_name_or_path,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,

View File

@ -49,7 +49,7 @@ from vllm.model_executor.model_loader.utils import (ParamMapping,
from vllm.model_executor.model_loader.weight_utils import ( from vllm.model_executor.model_loader.weight_utils import (
download_safetensors_index_file_from_hf, download_weights_from_hf, download_safetensors_index_file_from_hf, download_weights_from_hf,
filter_duplicate_safetensors_files, filter_files_not_needed_for_inference, filter_duplicate_safetensors_files, filter_files_not_needed_for_inference,
get_gguf_extra_tensor_names, gguf_quant_weights_iterator, get_gguf_extra_tensor_names, get_lock, gguf_quant_weights_iterator,
initialize_dummy_weights, np_cache_weights_iterator, pt_weights_iterator, initialize_dummy_weights, np_cache_weights_iterator, pt_weights_iterator,
runai_safetensors_weights_iterator, safetensors_weights_iterator) runai_safetensors_weights_iterator, safetensors_weights_iterator)
from vllm.model_executor.utils import set_weight_attrs from vllm.model_executor.utils import set_weight_attrs
@ -235,10 +235,14 @@ class DefaultModelLoader(BaseModelLoader):
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
if not os.path.exists(model): if not os.path.exists(model):
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with get_lock(model, self.load_config.download_dir):
model_path = snapshot_download( model_path = snapshot_download(
model_id=model, model_id=model,
cache_dir=self.load_config.download_dir, cache_dir=self.load_config.download_dir,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, local_files_only=huggingface_hub.constants.
HF_HUB_OFFLINE,
revision=revision, revision=revision,
ignore_file_pattern=self.load_config.ignore_patterns, ignore_file_pattern=self.load_config.ignore_patterns,
) )

View File

@ -8,6 +8,7 @@ import os
import tempfile import tempfile
import time import time
from collections import defaultdict from collections import defaultdict
from pathlib import Path
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
import filelock import filelock
@ -67,8 +68,10 @@ class DisabledTqdm(tqdm):
super().__init__(*args, **kwargs, disable=True) super().__init__(*args, **kwargs, disable=True)
def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None): def get_lock(model_name_or_path: Union[str, Path],
cache_dir: Optional[str] = None):
lock_dir = cache_dir or temp_dir lock_dir = cache_dir or temp_dir
model_name_or_path = str(model_name_or_path)
os.makedirs(os.path.dirname(lock_dir), exist_ok=True) os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
model_name = model_name_or_path.replace("/", "-") model_name = model_name_or_path.replace("/", "-")
hash_name = hashlib.sha256(model_name.encode()).hexdigest() hash_name = hashlib.sha256(model_name.encode()).hexdigest()

View File

@ -150,8 +150,14 @@ def get_tokenizer(
# pylint: disable=C. # pylint: disable=C.
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
# avoid circuit import
from vllm.model_executor.model_loader.weight_utils import get_lock
# Only set the tokenizer here, model will be downloaded on the workers. # Only set the tokenizer here, model will be downloaded on the workers.
if not os.path.exists(tokenizer_name): if not os.path.exists(tokenizer_name):
# Use file lock to prevent multiple processes from
# downloading the same file at the same time.
with get_lock(tokenizer_name, download_dir):
tokenizer_path = snapshot_download( tokenizer_path = snapshot_download(
model_id=tokenizer_name, model_id=tokenizer_name,
cache_dir=download_dir, cache_dir=download_dir,