[Misc] Change buckets of histogram_iteration_tokens to [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096] to represent number of tokens (#17033)

Signed-off-by: sfc-gh-zhwang <flex.wang@snowflake.com>
This commit is contained in:
Flex Wang 2025-04-27 05:30:53 -07:00 committed by GitHub
parent 30215ca61f
commit 18445edd0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 17 deletions

View File

@ -140,16 +140,13 @@ class Metrics:
name="vllm:generation_tokens_total",
documentation="Number of generation tokens processed.",
labelnames=labelnames)
buckets = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
if not vllm_config.model_config.enforce_eager:
buckets = vllm_config.compilation_config.\
cudagraph_capture_sizes.copy()
buckets.sort()
self.histogram_iteration_tokens = self._histogram_cls(
name="vllm:iteration_tokens_total",
documentation="Histogram of number of tokens per engine_step.",
labelnames=labelnames,
buckets=buckets)
buckets=[
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192
])
self.histogram_time_to_first_token = self._histogram_cls(
name="vllm:time_to_first_token_seconds",
documentation="Histogram of time to first token in seconds.",

View File

@ -232,7 +232,10 @@ class PrometheusStatLogger(StatLoggerBase):
prometheus_client.Histogram(
name="vllm:iteration_tokens_total",
documentation="Histogram of number of tokens per engine_step.",
buckets=build_cudagraph_buckets(vllm_config),
buckets=[
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096,
16192
],
labelnames=labelnames).labels(*labelvalues)
self.histogram_max_num_generation_tokens_request = \
@ -467,16 +470,6 @@ def build_1_2_5_buckets(max_value: int) -> list[int]:
return build_buckets([1, 2, 5], max_value)
def build_cudagraph_buckets(vllm_config: VllmConfig) -> list[int]:
if not vllm_config.model_config.enforce_eager:
buckets = vllm_config.compilation_config.\
cudagraph_capture_sizes.copy()
buckets.sort()
return buckets
else:
return [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
def setup_default_loggers(
vllm_config: VllmConfig,
log_stats: bool,