[Frontend] correctly record prefill and decode time metrics (#10853)

Signed-off-by: Tomer Asida <tomera@ai21.com>
This commit is contained in:
tomeras91 2024-12-03 21:13:31 +02:00 committed by GitHub
parent 7090c27bb2
commit 7c32b6861e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -599,9 +599,9 @@ class PrometheusStatLogger(StatLoggerBase):
stats.time_queue_requests)
self._log_histogram(self.metrics.histogram_inference_time_request,
stats.time_inference_requests)
self._log_histogram(self.metrics.histogram_decode_time_request,
stats.time_prefill_requests)
self._log_histogram(self.metrics.histogram_prefill_time_request,
stats.time_prefill_requests)
self._log_histogram(self.metrics.histogram_decode_time_request,
stats.time_decode_requests)
self._log_histogram(self.metrics.histogram_time_in_queue_request,
stats.time_in_queue_requests)