mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 22:15:01 +08:00
[Benchmark] Show E2EL by default for pooling models (#27014)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
dcbb3f1871
commit
334535b6fb
@ -58,7 +58,7 @@ TERM_PLOTLIB_AVAILABLE = (importlib.util.find_spec("termplotlib") is not None) a
|
|||||||
|
|
||||||
class TaskType(Enum):
|
class TaskType(Enum):
|
||||||
GENERATION = "generation"
|
GENERATION = "generation"
|
||||||
EMBEDDING = "embedding"
|
POOLING = "pooling"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -1084,10 +1084,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--percentile-metrics",
|
"--percentile-metrics",
|
||||||
type=str,
|
type=str,
|
||||||
default="ttft,tpot,itl",
|
default=None,
|
||||||
help="Comma-separated list of selected metrics to report percentils. "
|
help="Comma-separated list of selected metrics to report percentils. "
|
||||||
"This argument specifies the metrics to report percentiles. "
|
"This argument specifies the metrics to report percentiles. "
|
||||||
'Allowed metric names are "ttft", "tpot", "itl", "e2el". ',
|
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
|
||||||
|
'If not specified, defaults to "ttft,tpot,itl" for generative models '
|
||||||
|
'and "e2el" for pooling models.',
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--metric-percentiles",
|
"--metric-percentiles",
|
||||||
@ -1310,7 +1312,11 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
goodput_config_dict = check_goodput_args(args)
|
goodput_config_dict = check_goodput_args(args)
|
||||||
|
|
||||||
backend = args.backend
|
backend = args.backend
|
||||||
task_type = TaskType.EMBEDDING if "embeddings" in backend else TaskType.GENERATION
|
task_type = (
|
||||||
|
TaskType.POOLING
|
||||||
|
if "embeddings" in backend or "rerank" in backend
|
||||||
|
else TaskType.GENERATION
|
||||||
|
)
|
||||||
|
|
||||||
# Collect the sampling parameters.
|
# Collect the sampling parameters.
|
||||||
if task_type == TaskType.GENERATION:
|
if task_type == TaskType.GENERATION:
|
||||||
@ -1336,12 +1342,17 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
|
|
||||||
if "temperature" not in sampling_params:
|
if "temperature" not in sampling_params:
|
||||||
sampling_params["temperature"] = 0.0 # Default to greedy decoding.
|
sampling_params["temperature"] = 0.0 # Default to greedy decoding.
|
||||||
|
|
||||||
|
default_percentile_metrics = "ttft,tpot,itl"
|
||||||
else:
|
else:
|
||||||
sampling_params = {}
|
sampling_params = {}
|
||||||
|
default_percentile_metrics = "e2el"
|
||||||
|
|
||||||
extra_body = args.extra_body or {}
|
extra_body = args.extra_body or {}
|
||||||
extra_body = {**sampling_params, **extra_body}
|
extra_body = {**sampling_params, **extra_body}
|
||||||
|
|
||||||
|
percentile_metrics: str = args.percentile_metrics or default_percentile_metrics
|
||||||
|
|
||||||
# Avoid GC processing "static" data - reduce pause times.
|
# Avoid GC processing "static" data - reduce pause times.
|
||||||
gc.collect()
|
gc.collect()
|
||||||
gc.freeze()
|
gc.freeze()
|
||||||
@ -1360,7 +1371,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
burstiness=args.burstiness,
|
burstiness=args.burstiness,
|
||||||
disable_tqdm=args.disable_tqdm,
|
disable_tqdm=args.disable_tqdm,
|
||||||
profile=args.profile,
|
profile=args.profile,
|
||||||
selected_percentile_metrics=args.percentile_metrics.split(","),
|
selected_percentile_metrics=percentile_metrics.split(","),
|
||||||
selected_percentiles=[float(p) for p in args.metric_percentiles.split(",")],
|
selected_percentiles=[float(p) for p in args.metric_percentiles.split(",")],
|
||||||
ignore_eos=args.ignore_eos,
|
ignore_eos=args.ignore_eos,
|
||||||
goodput_config_dict=goodput_config_dict,
|
goodput_config_dict=goodput_config_dict,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user