From 6d1479ca4b5a3904b6c5b4a1d741dda43efdc289 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Fri, 2 May 2025 20:24:45 +0800 Subject: [PATCH] [doc] add the print result (#17584) Signed-off-by: reidliu41 Co-authored-by: reidliu41 --- docs/source/features/quantization/fp8.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/features/quantization/fp8.md b/docs/source/features/quantization/fp8.md index b90bb49ef87ea..f87b2a02cd447 100644 --- a/docs/source/features/quantization/fp8.md +++ b/docs/source/features/quantization/fp8.md @@ -30,6 +30,7 @@ from vllm import LLM model = LLM("facebook/opt-125m", quantization="fp8") # INFO 06-10 17:55:42 model_runner.py:157] Loading model weights took 0.1550 GB result = model.generate("Hello, my name is") +print(result[0].outputs[0].text) ``` :::{warning} @@ -106,6 +107,7 @@ Load and run the model in `vllm`: from vllm import LLM model = LLM("./Meta-Llama-3-8B-Instruct-FP8-Dynamic") model.generate("Hello my name is") +print(result[0].outputs[0].text) ``` Evaluate accuracy with `lm_eval` (for example on 250 samples of `gsm8k`): @@ -188,4 +190,5 @@ from vllm import LLM model = LLM(model="Meta-Llama-3-8B-Instruct-FP8/") # INFO 06-10 21:15:41 model_runner.py:159] Loading model weights took 8.4596 GB result = model.generate("Hello, my name is") +print(result[0].outputs[0].text) ```