diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index 0f79b7e79d384..033551d07c39f 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -145,7 +145,7 @@ class Metrics: documentation="Histogram of number of tokens per engine_step.", labelnames=labelnames, buckets=[ - 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192 + 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384 ]) self.histogram_time_to_first_token = self._histogram_cls( name="vllm:time_to_first_token_seconds", diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index e2e0b305e81fa..7051c681b1a01 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -233,8 +233,8 @@ class PrometheusStatLogger(StatLoggerBase): name="vllm:iteration_tokens_total", documentation="Histogram of number of tokens per engine_step.", buckets=[ - 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, - 16192 + 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, + 16384 ], labelnames=labelnames).labels(*labelvalues)