improve cpu tests for 0.12.0

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
This commit is contained in:
Tsai, Louie 2025-12-18 16:22:30 -08:00
parent 09dc7c690c
commit b735255f17

View File

@ -19,10 +19,8 @@
"block_size": 128,
"trust_remote_code": "",
"disable_log_stats": "",
"enforce_eager": "",
"max_num_batched_tokens": 2048,
"max_num_seqs": 256,
"load_format": "dummy"
"max_num_seqs": 256
},
"client_parameters": {
"model": "meta-llama/Llama-3.1-8B-Instruct",
@ -35,7 +33,8 @@
{
"test_name": "serving_llama8B_tp1_sharegpt",
"server_parameters": {
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "sharegpt",
@ -45,7 +44,8 @@
{
"test_name": "serving_llama8B_tp2_sharegpt",
"server_parameters": {
"tensor_parallel_size": 2
"tensor_parallel_size": 2,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "sharegpt",
@ -55,7 +55,8 @@
{
"test_name": "serving_llama8B_tp1_random_128_128",
"server_parameters": {
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -66,7 +67,8 @@
{
"test_name": "serving_llama8B_tp2_random_128_128",
"server_parameters": {
"tensor_parallel_size": 2
"tensor_parallel_size": 2,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -77,7 +79,8 @@
{
"test_name": "serving_llama8B_tp4_random_128_128",
"server_parameters": {
"tensor_parallel_size": 4
"tensor_parallel_size": 4,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -88,7 +91,8 @@
{
"test_name": "serving_llama8B_tp1_random_128_2048",
"server_parameters": {
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -99,7 +103,8 @@
{
"test_name": "serving_llama8B_tp2_random_128_2048",
"server_parameters": {
"tensor_parallel_size": 2
"tensor_parallel_size": 2,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -110,7 +115,8 @@
{
"test_name": "serving_llama8B_tp4_random_128_2048",
"server_parameters": {
"tensor_parallel_size": 4
"tensor_parallel_size": 4,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -121,7 +127,8 @@
{
"test_name": "serving_llama8B_tp1_random_2048_128",
"server_parameters": {
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -132,7 +139,8 @@
{
"test_name": "serving_llama8B_tp2_random_2048_128",
"server_parameters": {
"tensor_parallel_size": 2
"tensor_parallel_size": 2,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -143,7 +151,8 @@
{
"test_name": "serving_llama8B_tp4_random_2048_128",
"server_parameters": {
"tensor_parallel_size": 4
"tensor_parallel_size": 4,
"enforce_eager": ""
},
"client_parameters": {
"dataset_name": "random",
@ -151,11 +160,51 @@
"random-output-len": 128
}
},
{
"test_name": "serving_llama8B_int4_tp1_random_128_128",
"server_parameters": {
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"tensor_parallel_size": 1
},
"client_parameters": {
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"dataset_name": "random",
"random-input-len": 128,
"random-output-len": 128
}
},
{
"test_name": "serving_llama8B_int4_tp2_random_128_128",
"server_parameters": {
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"tensor_parallel_size": 2
},
"client_parameters": {
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"dataset_name": "random",
"random-input-len": 128,
"random-output-len": 128
}
},
{
"test_name": "serving_llama8B_int4_tp4_random_128_128",
"server_parameters": {
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"tensor_parallel_size": 4
},
"client_parameters": {
"model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
"dataset_name": "random",
"random-input-len": 128,
"random-output-len": 128
}
},
{
"test_name": "serving_llama3B_tp1_random_128_128",
"server_parameters": {
"model": "meta-llama/Llama-3.2-3B-Instruct",
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"model": "meta-llama/Llama-3.2-3B-Instruct",
@ -168,7 +217,8 @@
"test_name": "serving_granite2B_tp1_random_128_128",
"server_parameters": {
"model": "ibm-granite/granite-3.2-2b-instruct",
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"model": "ibm-granite/granite-3.2-2b-instruct",
@ -181,7 +231,8 @@
"test_name": "serving_qwen1.7B_tp1_random_128_128",
"server_parameters": {
"model": "Qwen/Qwen3-1.7B",
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"model": "Qwen/Qwen3-1.7B",
@ -194,7 +245,8 @@
"test_name": "serving_qwen4B_tp1_random_128_128",
"server_parameters": {
"model": "Qwen/Qwen3-4B",
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"model": "Qwen/Qwen3-4B",
@ -207,7 +259,8 @@
"test_name": "serving_qwen8B_tp1_random_128_128",
"server_parameters": {
"model": "Qwen/Qwen3-8B",
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"model": "Qwen/Qwen3-8B",
@ -220,7 +273,8 @@
"test_name": "serving_glm9B_tp1_random_128_128",
"server_parameters": {
"model": "zai-org/glm-4-9b-hf",
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"model": "zai-org/glm-4-9b-hf",
@ -233,7 +287,8 @@
"test_name": "serving_gemma7B_tp1_random_128_128",
"server_parameters": {
"model": "google/gemma-7b",
"tensor_parallel_size": 1
"tensor_parallel_size": 1,
"enforce_eager": ""
},
"client_parameters": {
"model": "google/gemma-7b",