From 32c9d7f7650842cc20b2e66a4125ffe126619c50 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Sun, 14 Jul 2024 19:37:35 -0700 Subject: [PATCH] Report usage for beam search (#6404) --- vllm/sampling_params.py | 5 +++++ vllm/usage/usage_lib.py | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 90f0944a7f3de..ebe5e0fd34135 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -189,6 +189,10 @@ class SamplingParams: self._verify_args() if self.use_beam_search: + # Lazy import to avoid circular imports. + from vllm.usage.usage_lib import set_runtime_usage_data + set_runtime_usage_data("use_beam_search", True) + if not envs.VLLM_NO_DEPRECATION_WARNING: logger.warning( "[IMPORTANT] We plan to discontinue the support for beam " @@ -196,6 +200,7 @@ class SamplingParams: "https://github.com/vllm-project/vllm/issues/6226 for " "more information. Set VLLM_NO_DEPRECATION_WARNING=1 to " "suppress this warning.") + self._verify_beam_search() else: self._verify_non_beam_search() diff --git a/vllm/usage/usage_lib.py b/vllm/usage/usage_lib.py index afb3007a528b4..6907d8b9becd2 100644 --- a/vllm/usage/usage_lib.py +++ b/vllm/usage/usage_lib.py @@ -7,7 +7,7 @@ import time from enum import Enum from pathlib import Path from threading import Thread -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union from uuid import uuid4 import cpuinfo @@ -25,6 +25,13 @@ _USAGE_STATS_DO_NOT_TRACK_PATH = os.path.join(_config_home, _USAGE_STATS_ENABLED = None _USAGE_STATS_SERVER = envs.VLLM_USAGE_STATS_SERVER +_GLOBAL_RUNTIME_DATA: Dict[str, Union[str, int, bool]] = {} + + +def set_runtime_usage_data(key: str, value: Union[str, int, bool]) -> None: + """Set global usage data that will be sent with every usage heartbeat.""" + _GLOBAL_RUNTIME_DATA[key] = value + def is_usage_stats_enabled(): """Determine whether or not we can send usage stats to the server. @@ -187,7 +194,11 @@ class UsageMessage: """ while True: time.sleep(600) - data = {"uuid": self.uuid, "log_time": _get_current_timestamp_ns()} + data = { + "uuid": self.uuid, + "log_time": _get_current_timestamp_ns(), + } + data.update(_GLOBAL_RUNTIME_DATA) self._write_to_file(data) self._send_to_server(data)