mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-18 19:04:31 +08:00
[Misc] Change buckets of histogram_iteration_tokens to [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096] to represent number of tokens (#17033)
Signed-off-by: sfc-gh-zhwang <flex.wang@snowflake.com>
This commit is contained in:
parent
30215ca61f
commit
18445edd0f
@ -140,16 +140,13 @@ class Metrics:
|
||||
name="vllm:generation_tokens_total",
|
||||
documentation="Number of generation tokens processed.",
|
||||
labelnames=labelnames)
|
||||
buckets = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
|
||||
if not vllm_config.model_config.enforce_eager:
|
||||
buckets = vllm_config.compilation_config.\
|
||||
cudagraph_capture_sizes.copy()
|
||||
buckets.sort()
|
||||
self.histogram_iteration_tokens = self._histogram_cls(
|
||||
name="vllm:iteration_tokens_total",
|
||||
documentation="Histogram of number of tokens per engine_step.",
|
||||
labelnames=labelnames,
|
||||
buckets=buckets)
|
||||
buckets=[
|
||||
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192
|
||||
])
|
||||
self.histogram_time_to_first_token = self._histogram_cls(
|
||||
name="vllm:time_to_first_token_seconds",
|
||||
documentation="Histogram of time to first token in seconds.",
|
||||
|
||||
@ -232,7 +232,10 @@ class PrometheusStatLogger(StatLoggerBase):
|
||||
prometheus_client.Histogram(
|
||||
name="vllm:iteration_tokens_total",
|
||||
documentation="Histogram of number of tokens per engine_step.",
|
||||
buckets=build_cudagraph_buckets(vllm_config),
|
||||
buckets=[
|
||||
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096,
|
||||
16192
|
||||
],
|
||||
labelnames=labelnames).labels(*labelvalues)
|
||||
|
||||
self.histogram_max_num_generation_tokens_request = \
|
||||
@ -467,16 +470,6 @@ def build_1_2_5_buckets(max_value: int) -> list[int]:
|
||||
return build_buckets([1, 2, 5], max_value)
|
||||
|
||||
|
||||
def build_cudagraph_buckets(vllm_config: VllmConfig) -> list[int]:
|
||||
if not vllm_config.model_config.enforce_eager:
|
||||
buckets = vllm_config.compilation_config.\
|
||||
cudagraph_capture_sizes.copy()
|
||||
buckets.sort()
|
||||
return buckets
|
||||
else:
|
||||
return [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
|
||||
|
||||
|
||||
def setup_default_loggers(
|
||||
vllm_config: VllmConfig,
|
||||
log_stats: bool,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user