mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 15:25:01 +08:00
Signed-off-by: Elaine Zhao <elaineyz@amazon.com> Co-authored-by: Tailin Pan <tailinpa@amazon.com> Co-authored-by: Rishabh Rajesh <rishyraj@amazon.com> Co-authored-by: Yishan McNabb <yishanm@amazon.com> Co-authored-by: Patrick Lange <patlange@amazon.com> Co-authored-by: Maxwell Goldberg <mgld@amazon.com> Co-authored-by: Aakash Shetty <sheaak@amazon.com>
63 lines
2.4 KiB
Python
63 lines
2.4 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
from vllm import LLM, SamplingParams
|
|
|
|
|
|
def test_mistral():
|
|
llm = LLM(model="mistralai/Mistral-7B-v0.1",
|
|
tensor_parallel_size=2,
|
|
max_num_seqs=4,
|
|
max_model_len=128,
|
|
use_v2_block_manager=True,
|
|
override_neuron_config={
|
|
"sequence_parallel_enabled": False,
|
|
"skip_warmup": True
|
|
},
|
|
device="neuron")
|
|
|
|
# Send more prompts than the compiled batch size (4) and request
|
|
# varying generation lengths to test accuracy related to Neuron
|
|
# specific sequence id sorting.
|
|
prompts = [
|
|
"The president of the United States is",
|
|
"The capital of France is",
|
|
"What is Annapurna labs?",
|
|
"I believe the meaning of life is",
|
|
"Tell me a story about a brave knight",
|
|
"Hello, my name is Llama",
|
|
]
|
|
|
|
sampling_params = [
|
|
SamplingParams(top_k=1, max_tokens=10),
|
|
SamplingParams(top_k=1, max_tokens=20),
|
|
SamplingParams(top_k=1, max_tokens=30),
|
|
SamplingParams(top_k=1, max_tokens=40),
|
|
SamplingParams(top_k=1, max_tokens=50),
|
|
SamplingParams(top_k=1, max_tokens=60)
|
|
]
|
|
|
|
outputs = llm.generate(prompts, sampling_params)
|
|
|
|
expected_outputs = [
|
|
" the most powerful person in the world. He is",
|
|
" a city of many faces. It is a city of history, culture, art, "
|
|
"fashion, and",
|
|
"\n\nAnnapurna Labs is a semiconductor company that was founded "
|
|
"in 2013 by Amazon. The company is",
|
|
" to be happy.\n\nI believe that happiness is a choice.\n\nI "
|
|
"believe that happiness is a state of mind.\n\nI believe that "
|
|
"happiness is a journey.\n\nI believe",
|
|
" who rescued a princess from a dragon.\n\nTell me a story about"
|
|
" a princess who rescued herself from a dragon.\n\nTell me a "
|
|
"story about a princess who rescued herself from a dragon and "
|
|
"then rescued a knight from",
|
|
" and I am a 10 year old male. I am a very friendly and "
|
|
"affectionate boy who loves to be around people. I am a very "
|
|
"active boy who loves to play and run around. I am a very smart "
|
|
"boy who loves to learn new things. I am a very loyal boy"
|
|
]
|
|
|
|
for expected_output, output in zip(expected_outputs, outputs):
|
|
generated_text = output.outputs[0].text
|
|
assert (expected_output == generated_text)
|