mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 05:45:01 +08:00
[Frontend] [Core] Tensorizer: support dynamic num_readers, update version (#4467)
This commit is contained in:
parent
a494140433
commit
715c2d854d
@ -14,7 +14,7 @@ types-setuptools
|
|||||||
|
|
||||||
# testing
|
# testing
|
||||||
pytest
|
pytest
|
||||||
tensorizer==2.9.0a0
|
tensorizer==2.9.0
|
||||||
pytest-forked
|
pytest-forked
|
||||||
pytest-asyncio
|
pytest-asyncio
|
||||||
pytest-rerunfailures
|
pytest-rerunfailures
|
||||||
|
|||||||
2
setup.py
2
setup.py
@ -408,7 +408,7 @@ setup(
|
|||||||
install_requires=get_requirements(),
|
install_requires=get_requirements(),
|
||||||
ext_modules=ext_modules,
|
ext_modules=ext_modules,
|
||||||
extras_require={
|
extras_require={
|
||||||
"tensorizer": ["tensorizer==2.9.0a1"],
|
"tensorizer": ["tensorizer==2.9.0"],
|
||||||
},
|
},
|
||||||
cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
|
cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
|
||||||
package_data=package_data,
|
package_data=package_data,
|
||||||
|
|||||||
@ -44,7 +44,7 @@ class TensorizerConfig:
|
|||||||
str, bytes, os.PathLike, int]
|
str, bytes, os.PathLike, int]
|
||||||
vllm_tensorized: bool
|
vllm_tensorized: bool
|
||||||
verify_hash: Optional[bool] = False
|
verify_hash: Optional[bool] = False
|
||||||
num_readers: Optional[int] = 1
|
num_readers: Optional[int] = None
|
||||||
encryption_keyfile: Optional[str] = None
|
encryption_keyfile: Optional[str] = None
|
||||||
s3_access_key_id: Optional[str] = None
|
s3_access_key_id: Optional[str] = None
|
||||||
s3_secret_access_key: Optional[str] = None
|
s3_secret_access_key: Optional[str] = None
|
||||||
@ -104,7 +104,7 @@ class TensorizerArgs:
|
|||||||
str, bytes, os.PathLike, int]
|
str, bytes, os.PathLike, int]
|
||||||
vllm_tensorized: bool
|
vllm_tensorized: bool
|
||||||
verify_hash: Optional[bool] = False
|
verify_hash: Optional[bool] = False
|
||||||
num_readers: Optional[int] = 1
|
num_readers: Optional[int] = None
|
||||||
encryption_keyfile: Optional[str] = None
|
encryption_keyfile: Optional[str] = None
|
||||||
s3_access_key_id: Optional[str] = None
|
s3_access_key_id: Optional[str] = None
|
||||||
s3_secret_access_key: Optional[str] = None
|
s3_secret_access_key: Optional[str] = None
|
||||||
@ -125,8 +125,9 @@ class TensorizerArgs:
|
|||||||
the hashes stored in the metadata. A `HashMismatchError` will be
|
the hashes stored in the metadata. A `HashMismatchError` will be
|
||||||
raised if any of the hashes do not match.
|
raised if any of the hashes do not match.
|
||||||
num_readers: Controls how many threads are allowed to read concurrently
|
num_readers: Controls how many threads are allowed to read concurrently
|
||||||
from the source file. Default is 1. This greatly increases
|
from the source file. Default is `None`, which will dynamically set
|
||||||
performance.
|
the number of readers based on the number of available
|
||||||
|
resources and model size. This greatly increases performance.
|
||||||
encryption_keyfile: File path to a binary file containing a
|
encryption_keyfile: File path to a binary file containing a
|
||||||
binary key to use for decryption. `None` (the default) means
|
binary key to use for decryption. `None` (the default) means
|
||||||
no decryption. See the example script in
|
no decryption. See the example script in
|
||||||
@ -199,10 +200,12 @@ class TensorizerArgs:
|
|||||||
"use for decryption. Can be a file path or S3 network URI.")
|
"use for decryption. Can be a file path or S3 network URI.")
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
"--num-readers",
|
"--num-readers",
|
||||||
default=1,
|
default=None,
|
||||||
type=int,
|
type=int,
|
||||||
help="Controls how many threads are allowed to read concurrently "
|
help="Controls how many threads are allowed to read concurrently "
|
||||||
"from the source file.")
|
"from the source file. Default is `None`, which will dynamically "
|
||||||
|
"set the number of readers based on the available resources "
|
||||||
|
"and model size. This greatly increases performance.")
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
"--s3-access-key-id",
|
"--s3-access-key-id",
|
||||||
default=None,
|
default=None,
|
||||||
@ -337,7 +340,7 @@ class TensorizerAgent:
|
|||||||
per_second = convert_bytes(deserializer.total_tensor_bytes / duration)
|
per_second = convert_bytes(deserializer.total_tensor_bytes / duration)
|
||||||
after_mem = get_mem_usage()
|
after_mem = get_mem_usage()
|
||||||
deserializer.close()
|
deserializer.close()
|
||||||
logger.info("Deserialized %s in %0.2fs, %f/s", total_bytes_str,
|
logger.info("Deserialized %s in %0.2fs, %s/s", total_bytes_str,
|
||||||
end - start, per_second)
|
end - start, per_second)
|
||||||
logger.info("Memory usage before: %s", before_mem)
|
logger.info("Memory usage before: %s", before_mem)
|
||||||
logger.info("Memory usage after: %s", after_mem)
|
logger.info("Memory usage after: %s", after_mem)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user