mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-12 07:40:13 +08:00
[V0][Metrics] Deprecate some questionable request time metrics (#14135)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
parent
5db6b2c961
commit
c8525f06fc
@ -197,24 +197,35 @@ class Metrics:
|
|||||||
"Histogram of time spent in DECODE phase for request.",
|
"Histogram of time spent in DECODE phase for request.",
|
||||||
labelnames=labelnames,
|
labelnames=labelnames,
|
||||||
buckets=request_latency_buckets)
|
buckets=request_latency_buckets)
|
||||||
|
# Deprecated in 0.8 - duplicates vllm:request_queue_time_seconds:
|
||||||
|
# TODO: in 0.9, only enable if show_hidden_metrics=True
|
||||||
self.histogram_time_in_queue_request = self._histogram_cls(
|
self.histogram_time_in_queue_request = self._histogram_cls(
|
||||||
name="vllm:time_in_queue_requests",
|
name="vllm:time_in_queue_requests",
|
||||||
documentation=
|
documentation=(
|
||||||
"Histogram of time the request spent in the queue in seconds.",
|
"Histogram of time the request spent in the queue in seconds. "
|
||||||
|
"DEPRECATED: use vllm:request_queue_time_seconds instead."),
|
||||||
labelnames=labelnames,
|
labelnames=labelnames,
|
||||||
buckets=request_latency_buckets)
|
buckets=request_latency_buckets)
|
||||||
|
|
||||||
|
# Deprecated in 0.8 - use prefill/decode/inference time metrics
|
||||||
|
# TODO: in 0.9, only enable if show_hidden_metrics=True
|
||||||
self.histogram_model_forward_time_request = self._histogram_cls(
|
self.histogram_model_forward_time_request = self._histogram_cls(
|
||||||
name="vllm:model_forward_time_milliseconds",
|
name="vllm:model_forward_time_milliseconds",
|
||||||
documentation=
|
documentation=(
|
||||||
"Histogram of time spent in the model forward pass in ms.",
|
"Histogram of time spent in the model forward pass in ms. "
|
||||||
|
"DEPRECATED: use prefill/decode/inference time metrics instead."
|
||||||
|
),
|
||||||
labelnames=labelnames,
|
labelnames=labelnames,
|
||||||
buckets=build_1_2_3_5_8_buckets(3000))
|
buckets=build_1_2_3_5_8_buckets(3000))
|
||||||
self.histogram_model_execute_time_request = self._histogram_cls(
|
self.histogram_model_execute_time_request = self._histogram_cls(
|
||||||
name="vllm:model_execute_time_milliseconds",
|
name="vllm:model_execute_time_milliseconds",
|
||||||
documentation=
|
documentation=(
|
||||||
"Histogram of time spent in the model execute function in ms.",
|
"Histogram of time spent in the model execute function in ms."
|
||||||
|
"DEPRECATED: use prefill/decode/inference time metrics instead."
|
||||||
|
),
|
||||||
labelnames=labelnames,
|
labelnames=labelnames,
|
||||||
buckets=build_1_2_3_5_8_buckets(3000))
|
buckets=build_1_2_3_5_8_buckets(3000))
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
self.histogram_num_prompt_tokens_request = self._histogram_cls(
|
self.histogram_num_prompt_tokens_request = self._histogram_cls(
|
||||||
name="vllm:request_prompt_tokens",
|
name="vllm:request_prompt_tokens",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user