mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-22 02:04:28 +08:00
fix: enhance human_readable_int function (#30337)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
parent
979f50efd0
commit
d02d1043de
@ -350,21 +350,35 @@ def test_human_readable_model_len():
|
||||
assert args.max_model_len == 1_000_000
|
||||
args = parser.parse_args(["--max-model-len", "10k"])
|
||||
assert args.max_model_len == 10_000
|
||||
args = parser.parse_args(["--max-model-len", "2g"])
|
||||
assert args.max_model_len == 2_000_000_000
|
||||
args = parser.parse_args(["--max-model-len", "2t"])
|
||||
assert args.max_model_len == 2_000_000_000_000
|
||||
|
||||
# Capital
|
||||
args = parser.parse_args(["--max-model-len", "3K"])
|
||||
assert args.max_model_len == 1024 * 3
|
||||
assert args.max_model_len == 2**10 * 3
|
||||
args = parser.parse_args(["--max-model-len", "10M"])
|
||||
assert args.max_model_len == 2**20 * 10
|
||||
args = parser.parse_args(["--max-model-len", "4G"])
|
||||
assert args.max_model_len == 2**30 * 4
|
||||
args = parser.parse_args(["--max-model-len", "4T"])
|
||||
assert args.max_model_len == 2**40 * 4
|
||||
|
||||
# Decimal values
|
||||
args = parser.parse_args(["--max-model-len", "10.2k"])
|
||||
assert args.max_model_len == 10200
|
||||
# ..truncated to the nearest int
|
||||
args = parser.parse_args(["--max-model-len", "10.212345k"])
|
||||
args = parser.parse_args(["--max-model-len", "10.2123451234567k"])
|
||||
assert args.max_model_len == 10212
|
||||
args = parser.parse_args(["--max-model-len", "10.2123451234567m"])
|
||||
assert args.max_model_len == 10212345
|
||||
args = parser.parse_args(["--max-model-len", "10.2123451234567g"])
|
||||
assert args.max_model_len == 10212345123
|
||||
args = parser.parse_args(["--max-model-len", "10.2123451234567t"])
|
||||
assert args.max_model_len == 10212345123456
|
||||
|
||||
# Invalid (do not allow decimals with binary multipliers)
|
||||
for invalid in ["1a", "pwd", "10.24", "1.23M"]:
|
||||
for invalid in ["1a", "pwd", "10.24", "1.23M", "1.22T"]:
|
||||
with pytest.raises(ArgumentError):
|
||||
args = parser.parse_args(["--max-model-len", invalid])
|
||||
parser.parse_args(["--max-model-len", invalid])
|
||||
|
||||
@ -1783,6 +1783,7 @@ class EngineArgs:
|
||||
except Exception:
|
||||
# This is only used to set default_max_num_batched_tokens
|
||||
device_memory = 0
|
||||
device_name = ""
|
||||
|
||||
# NOTE(Kuntai): Setting large `max_num_batched_tokens` for A100 reduces
|
||||
# throughput, see PR #17885 for more details.
|
||||
@ -2042,11 +2043,13 @@ def human_readable_int(value):
|
||||
"k": 10**3,
|
||||
"m": 10**6,
|
||||
"g": 10**9,
|
||||
"t": 10**12,
|
||||
}
|
||||
binary_multiplier = {
|
||||
"K": 2**10,
|
||||
"M": 2**20,
|
||||
"G": 2**30,
|
||||
"T": 2**40,
|
||||
}
|
||||
|
||||
number, suffix = match.groups()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user