mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-03 04:04:32 +08:00
Make pickle import check fast (#25379)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
ddf4e1f56f
commit
104e62fbc8
@ -155,11 +155,10 @@ repos:
|
|||||||
additional_dependencies: [regex]
|
additional_dependencies: [regex]
|
||||||
- id: check-pickle-imports
|
- id: check-pickle-imports
|
||||||
name: Prevent new pickle/cloudpickle imports
|
name: Prevent new pickle/cloudpickle imports
|
||||||
entry: python tools/check_pickle_imports.py
|
entry: python tools/pre_commit/check_pickle_imports.py
|
||||||
language: python
|
language: python
|
||||||
types: [python]
|
types: [python]
|
||||||
pass_filenames: false
|
additional_dependencies: [regex]
|
||||||
additional_dependencies: [pathspec, regex]
|
|
||||||
- id: validate-config
|
- id: validate-config
|
||||||
name: Validate configuration has default values and that each field has a docstring
|
name: Validate configuration has default values and that each field has a docstring
|
||||||
entry: python tools/validate_config.py
|
entry: python tools/validate_config.py
|
||||||
|
|||||||
@ -1,20 +1,10 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import regex as re
|
import regex as re
|
||||||
|
|
||||||
try:
|
|
||||||
import pathspec
|
|
||||||
except ImportError:
|
|
||||||
print(
|
|
||||||
"ERROR: The 'pathspec' library is required. "
|
|
||||||
"Install it with 'pip install pathspec'.",
|
|
||||||
file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
# List of files (relative to repo root) that are allowed to import pickle or
|
# List of files (relative to repo root) that are allowed to import pickle or
|
||||||
# cloudpickle
|
# cloudpickle
|
||||||
#
|
#
|
||||||
@ -25,7 +15,7 @@ except ImportError:
|
|||||||
# Before adding new uses of pickle/cloudpickle, please consider safer
|
# Before adding new uses of pickle/cloudpickle, please consider safer
|
||||||
# alternatives like msgpack or pydantic that are already in use in vLLM. Only
|
# alternatives like msgpack or pydantic that are already in use in vLLM. Only
|
||||||
# add to this list if absolutely necessary and after careful security review.
|
# add to this list if absolutely necessary and after careful security review.
|
||||||
ALLOWED_FILES = set([
|
ALLOWED_FILES = {
|
||||||
# pickle
|
# pickle
|
||||||
'vllm/v1/serial_utils.py',
|
'vllm/v1/serial_utils.py',
|
||||||
'vllm/v1/executor/multiproc_executor.py',
|
'vllm/v1/executor/multiproc_executor.py',
|
||||||
@ -36,11 +26,9 @@ ALLOWED_FILES = set([
|
|||||||
'tests/tokenization/test_cached_tokenizer.py',
|
'tests/tokenization/test_cached_tokenizer.py',
|
||||||
'vllm/distributed/utils.py',
|
'vllm/distributed/utils.py',
|
||||||
'vllm/distributed/parallel_state.py',
|
'vllm/distributed/parallel_state.py',
|
||||||
'vllm/engine/multiprocessing/client.py',
|
|
||||||
'vllm/distributed/device_communicators/all_reduce_utils.py',
|
'vllm/distributed/device_communicators/all_reduce_utils.py',
|
||||||
'vllm/distributed/device_communicators/shm_broadcast.py',
|
'vllm/distributed/device_communicators/shm_broadcast.py',
|
||||||
'vllm/distributed/device_communicators/shm_object_storage.py',
|
'vllm/distributed/device_communicators/shm_object_storage.py',
|
||||||
'vllm/engine/multiprocessing/engine.py',
|
|
||||||
'benchmarks/kernels/graph_machete_bench.py',
|
'benchmarks/kernels/graph_machete_bench.py',
|
||||||
'benchmarks/kernels/benchmark_lora.py',
|
'benchmarks/kernels/benchmark_lora.py',
|
||||||
'benchmarks/kernels/benchmark_machete.py',
|
'benchmarks/kernels/benchmark_machete.py',
|
||||||
@ -55,65 +43,30 @@ ALLOWED_FILES = set([
|
|||||||
'tests/utils.py',
|
'tests/utils.py',
|
||||||
# pickle and cloudpickle
|
# pickle and cloudpickle
|
||||||
'vllm/utils/__init__.py',
|
'vllm/utils/__init__.py',
|
||||||
'vllm/v1/serial_utils.py',
|
}
|
||||||
'vllm/v1/executor/multiproc_executor.py',
|
|
||||||
'vllm/transformers_utils/config.py',
|
|
||||||
'vllm/model_executor/models/registry.py',
|
|
||||||
'vllm/engine/multiprocessing/client.py',
|
|
||||||
'vllm/engine/multiprocessing/engine.py',
|
|
||||||
])
|
|
||||||
|
|
||||||
PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)"
|
PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)"
|
||||||
r"|from\s+(pickle|cloudpickle)\s+import\b)")
|
r"|from\s+(pickle|cloudpickle)\s+import\b)")
|
||||||
|
|
||||||
|
|
||||||
def is_python_file(path):
|
def scan_file(path: str) -> int:
|
||||||
return path.endswith('.py')
|
|
||||||
|
|
||||||
|
|
||||||
def scan_file(path):
|
|
||||||
with open(path, encoding='utf-8') as f:
|
with open(path, encoding='utf-8') as f:
|
||||||
for line in f:
|
for i, line in enumerate(f, 1):
|
||||||
if PICKLE_RE.match(line):
|
if PICKLE_RE.match(line):
|
||||||
return True
|
print(f"{path}:{i}: "
|
||||||
return False
|
"\033[91merror:\033[0m " # red color
|
||||||
|
"Found pickle/cloudpickle import")
|
||||||
|
return 1
|
||||||
def load_gitignore(repo_root):
|
return 0
|
||||||
gitignore_path = os.path.join(repo_root, '.gitignore')
|
|
||||||
patterns = []
|
|
||||||
if os.path.exists(gitignore_path):
|
|
||||||
with open(gitignore_path, encoding='utf-8') as f:
|
|
||||||
patterns = f.read().splitlines()
|
|
||||||
# Always ignore .git directory
|
|
||||||
patterns.append('.git/')
|
|
||||||
return pathspec.PathSpec.from_lines('gitwildmatch', patterns)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
returncode = 0
|
||||||
spec = load_gitignore(repo_root)
|
for filename in sys.argv[1:]:
|
||||||
bad_files = []
|
if filename in ALLOWED_FILES:
|
||||||
for dirpath, _, filenames in os.walk(repo_root):
|
continue
|
||||||
for filename in filenames:
|
returncode |= scan_file(filename)
|
||||||
if not is_python_file(filename):
|
return returncode
|
||||||
continue
|
|
||||||
abs_path = os.path.join(dirpath, filename)
|
|
||||||
rel_path = os.path.relpath(abs_path, repo_root)
|
|
||||||
# Skip ignored files
|
|
||||||
if spec.match_file(rel_path):
|
|
||||||
continue
|
|
||||||
if scan_file(abs_path) and rel_path not in ALLOWED_FILES:
|
|
||||||
bad_files.append(rel_path)
|
|
||||||
if bad_files:
|
|
||||||
print("\nERROR: The following files import 'pickle' or 'cloudpickle' "
|
|
||||||
"but are not in the allowed list:")
|
|
||||||
for f in bad_files:
|
|
||||||
print(f" {f}")
|
|
||||||
print("\nIf this is intentional, update the allowed list in "
|
|
||||||
"tools/check_pickle_imports.py.")
|
|
||||||
sys.exit(1)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
def test_regex():
|
def test_regex():
|
||||||
@ -149,4 +102,4 @@ if __name__ == '__main__':
|
|||||||
if '--test-regex' in sys.argv:
|
if '--test-regex' in sys.argv:
|
||||||
test_regex()
|
test_regex()
|
||||||
else:
|
else:
|
||||||
main()
|
sys.exit(main())
|
||||||
Loading…
x
Reference in New Issue
Block a user