vllm/.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
Simon Mo 02f0c7b220
[Misc] Add SPDX-FileCopyrightText (#19100)
Signed-off-by: simon-mo <simon.mo@hey.com>
2025-06-03 11:20:17 -07:00

27 lines
801 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import argparse
from transformers import AutoTokenizer
def main(model, cachedir):
# Load the tokenizer and save it to the specified directory
tokenizer = AutoTokenizer.from_pretrained(model)
tokenizer.save_pretrained(cachedir)
print(f"Tokenizer saved to {cachedir}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Download and save Hugging Face tokenizer"
)
parser.add_argument("--model", type=str, required=True, help="Name of the model")
parser.add_argument(
"--cachedir", type=str, required=True, help="Directory to save the tokenizer"
)
args = parser.parse_args()
main(args.model, args.cachedir)