mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 10:04:59 +08:00
[Bugfix] Miscalculated latency lead to time_to_first_token_seconds inaccurate. (#6686)
This commit is contained in:
parent
2cf0df3381
commit
40468b13fa
@ -949,8 +949,9 @@ class LLMEngine:
|
|||||||
model_output: Optional[List[SamplerOutput]] = None) -> None:
|
model_output: Optional[List[SamplerOutput]] = None) -> None:
|
||||||
"""Forced log when no requests active."""
|
"""Forced log when no requests active."""
|
||||||
if self.log_stats:
|
if self.log_stats:
|
||||||
|
stats = self._get_stats(scheduler_outputs, model_output)
|
||||||
for logger in self.stat_loggers.values():
|
for logger in self.stat_loggers.values():
|
||||||
logger.log(self._get_stats(scheduler_outputs, model_output))
|
logger.log(stats)
|
||||||
|
|
||||||
def _get_stats(
|
def _get_stats(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -484,7 +484,7 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase):
|
|||||||
for both speculation cases (num_lookahead_slots>0) and non-speculation
|
for both speculation cases (num_lookahead_slots>0) and non-speculation
|
||||||
cases (e.g. prefill).
|
cases (e.g. prefill).
|
||||||
|
|
||||||
Returns True iff there are remaining sequences to process.
|
Returns True if there are remaining sequences to process.
|
||||||
"""
|
"""
|
||||||
assert self.rank != self._driver_rank
|
assert self.rank != self._driver_rank
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user