mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-10 12:24:41 +08:00
[CI] Add Qwen3-Next-FP8 to Blackwell model tests (#31049)
Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>
This commit is contained in:
parent
bfa2c0bbb9
commit
bc0a5a0c08
11
tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2.yaml
Normal file
11
tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2.yaml
Normal file
@ -0,0 +1,11 @@
|
||||
model_name: "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8"
|
||||
accuracy_threshold: 0.85
|
||||
num_questions: 1319
|
||||
num_fewshot: 5
|
||||
server_args: >-
|
||||
--max-model-len 4096
|
||||
--tensor-parallel-size 2
|
||||
--enable-expert-parallel
|
||||
--async-scheduling
|
||||
env:
|
||||
VLLM_USE_FLASHINFER_MOE_FP8: "1"
|
||||
@ -4,3 +4,4 @@ Qwen1.5-MoE-W4A16-CT.yaml
|
||||
DeepSeek-V2-Lite-Instruct-FP8.yaml
|
||||
Qwen3-30B-A3B-NVFP4.yaml
|
||||
Qwen3-Next-80B-A3B-NVFP4-EP2.yaml
|
||||
Qwen3-Next-FP8-EP2.yaml
|
||||
|
||||
@ -71,6 +71,7 @@ def test_gsm8k_correctness(config_filename):
|
||||
print(f"Number of questions: {eval_config['num_questions']}")
|
||||
print(f"Number of few-shot examples: {eval_config['num_fewshot']}")
|
||||
print(f"Server args: {' '.join(server_args)}")
|
||||
print(f"Environment variables: {env_dict}")
|
||||
|
||||
# Launch server and run evaluation
|
||||
with RemoteOpenAIServer(
|
||||
|
||||
@ -106,6 +106,7 @@ class RemoteOpenAIServer:
|
||||
env.update(env_dict)
|
||||
serve_cmd = ["vllm", "serve", model, *vllm_serve_args]
|
||||
print(f"Launching RemoteOpenAIServer with: {' '.join(serve_cmd)}")
|
||||
print(f"Environment variables: {env}")
|
||||
self.proc: subprocess.Popen = subprocess.Popen(
|
||||
serve_cmd,
|
||||
env=env,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user