[Frontend] [Core] Tensorizer: support dynamic num_readers, update version (#4467)

This commit is contained in:
Alpay Ariyak 2024-04-30 19:32:13 -04:00 committed by GitHub
parent a494140433
commit 715c2d854d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 9 deletions

View File

@ -14,7 +14,7 @@ types-setuptools
# testing
pytest
tensorizer==2.9.0a0
tensorizer==2.9.0
pytest-forked
pytest-asyncio
pytest-rerunfailures

View File

@ -408,7 +408,7 @@ setup(
install_requires=get_requirements(),
ext_modules=ext_modules,
extras_require={
"tensorizer": ["tensorizer==2.9.0a1"],
"tensorizer": ["tensorizer==2.9.0"],
},
cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
package_data=package_data,

View File

@ -44,7 +44,7 @@ class TensorizerConfig:
str, bytes, os.PathLike, int]
vllm_tensorized: bool
verify_hash: Optional[bool] = False
num_readers: Optional[int] = 1
num_readers: Optional[int] = None
encryption_keyfile: Optional[str] = None
s3_access_key_id: Optional[str] = None
s3_secret_access_key: Optional[str] = None
@ -104,7 +104,7 @@ class TensorizerArgs:
str, bytes, os.PathLike, int]
vllm_tensorized: bool
verify_hash: Optional[bool] = False
num_readers: Optional[int] = 1
num_readers: Optional[int] = None
encryption_keyfile: Optional[str] = None
s3_access_key_id: Optional[str] = None
s3_secret_access_key: Optional[str] = None
@ -125,8 +125,9 @@ class TensorizerArgs:
the hashes stored in the metadata. A `HashMismatchError` will be
raised if any of the hashes do not match.
num_readers: Controls how many threads are allowed to read concurrently
from the source file. Default is 1. This greatly increases
performance.
from the source file. Default is `None`, which will dynamically set
the number of readers based on the number of available
resources and model size. This greatly increases performance.
encryption_keyfile: File path to a binary file containing a
binary key to use for decryption. `None` (the default) means
no decryption. See the example script in
@ -199,10 +200,12 @@ class TensorizerArgs:
"use for decryption. Can be a file path or S3 network URI.")
group.add_argument(
"--num-readers",
default=1,
default=None,
type=int,
help="Controls how many threads are allowed to read concurrently "
"from the source file.")
"from the source file. Default is `None`, which will dynamically "
"set the number of readers based on the available resources "
"and model size. This greatly increases performance.")
group.add_argument(
"--s3-access-key-id",
default=None,
@ -337,7 +340,7 @@ class TensorizerAgent:
per_second = convert_bytes(deserializer.total_tensor_bytes / duration)
after_mem = get_mem_usage()
deserializer.close()
logger.info("Deserialized %s in %0.2fs, %f/s", total_bytes_str,
logger.info("Deserialized %s in %0.2fs, %s/s", total_bytes_str,
end - start, per_second)
logger.info("Memory usage before: %s", before_mem)
logger.info("Memory usage after: %s", after_mem)