diff --git a/examples/offline_inference/profiling_tpu/README.md b/examples/offline_inference/profiling_tpu/README.md index e0122c05cff1..8c9c1c92b676 100644 --- a/examples/offline_inference/profiling_tpu/README.md +++ b/examples/offline_inference/profiling_tpu/README.md @@ -57,7 +57,10 @@ Once you have collected your profiles with this script, you can visualize them u Here are most likely the dependencies you need to install: ```bash -pip install tensorflow-cpu tensorboard-plugin-profile etils importlib_resources +pip install tensorflow-cpu \ + tensorboard-plugin-profile \ + etils \ + importlib_resources ``` Then you just need to point TensorBoard to the directory where you saved the profiles and visit `http://localhost:6006/` in your browser: diff --git a/examples/online_serving/structured_outputs/README.md b/examples/online_serving/structured_outputs/README.md index c9b97f11eefd..d2777a43d478 100644 --- a/examples/online_serving/structured_outputs/README.md +++ b/examples/online_serving/structured_outputs/README.md @@ -13,13 +13,15 @@ vllm serve Qwen/Qwen2.5-3B-Instruct To serve a reasoning model, you can use the following command: ```bash -vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --reasoning-parser deepseek_r1 +vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \ + --reasoning-parser deepseek_r1 ``` If you want to run this script standalone with `uv`, you can use the following: ```bash -uvx --from git+https://github.com/vllm-project/vllm#subdirectory=examples/online_serving/structured_outputs structured-output +uvx --from git+https://github.com/vllm-project/vllm#subdirectory=examples/online_serving/structured_outputs \ + structured-output ``` See [feature docs](https://docs.vllm.ai/en/latest/features/structured_outputs.html) for more information. @@ -44,7 +46,9 @@ uv run structured_outputs.py --stream Run certain constraints, for example `structural_tag` and `regex`, streaming: ```bash -uv run structured_outputs.py --constraint structural_tag regex --stream +uv run structured_outputs.py \ + --constraint structural_tag regex \ + --stream ``` Run all constraints, with reasoning models and streaming: diff --git a/examples/others/tensorize_vllm_model.py b/examples/others/tensorize_vllm_model.py index 9e1003a5c39d..11233229561b 100644 --- a/examples/others/tensorize_vllm_model.py +++ b/examples/others/tensorize_vllm_model.py @@ -202,7 +202,7 @@ def parse_args(): -def deserialize(): +def deserialize(args, tensorizer_config): if args.lora_path: tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir llm = LLM(model=args.model, @@ -242,7 +242,7 @@ def deserialize(): return llm -if __name__ == '__main__': +def main(): args = parse_args() s3_access_key_id = (getattr(args, 's3_access_key_id', None) @@ -260,8 +260,6 @@ if __name__ == '__main__': model_ref = args.model - model_name = model_ref.split("/")[1] - if args.command == "serialize" or args.command == "deserialize": keyfile = args.keyfile else: @@ -309,6 +307,10 @@ if __name__ == '__main__': encryption_keyfile = keyfile, **credentials ) - deserialize() + deserialize(args, tensorizer_config) else: raise ValueError("Either serialize or deserialize must be specified.") + + +if __name__ == "__main__": + main()