From 03dccc886ef7e5d0dd67512f3e9748ee00c21fb2 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 14 Jun 2024 02:21:39 +0800 Subject: [PATCH] [Misc] Add vLLM version getter to utils (#5098) --- setup.py | 2 +- vllm/__init__.py | 3 ++- vllm/engine/llm_engine.py | 4 ++-- vllm/entrypoints/openai/api_server.py | 6 +++--- vllm/entrypoints/openai/run_batch.py | 4 ++-- vllm/usage/usage_lib.py | 4 ++-- vllm/version.py | 1 + 7 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 vllm/version.py diff --git a/setup.py b/setup.py index 3a41b1a0b31b..12a704e08eed 100644 --- a/setup.py +++ b/setup.py @@ -314,7 +314,7 @@ def find_version(filepath: str) -> str: def get_vllm_version() -> str: - version = find_version(get_path("vllm", "__init__.py")) + version = find_version(get_path("vllm", "version.py")) if _is_cuda(): cuda_version = str(get_nvcc_cuda_version()) diff --git a/vllm/__init__.py b/vllm/__init__.py index 10cc66941a7f..e217059873bf 100644 --- a/vllm/__init__.py +++ b/vllm/__init__.py @@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput, from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams -__version__ = "0.5.0" +from .version import __version__ __all__ = [ + "__version__", "LLM", "ModelRegistry", "PromptStrictInputs", diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index ea754758492f..b2f6478cbfd7 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union from transformers import GenerationConfig, PreTrainedTokenizer -import vllm from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig, LoRAConfig, ModelConfig, ParallelConfig, SchedulerConfig, SpeculativeConfig, @@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup, from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled, usage_message) from vllm.utils import Counter +from vllm.version import __version__ as VLLM_VERSION logger = init_logger(__name__) _LOCAL_LOGGING_INTERVAL_SEC = 5 @@ -169,7 +169,7 @@ class LLMEngine: "enforce_eager=%s, kv_cache_dtype=%s, " "quantization_param_path=%s, device_config=%s, " "decoding_config=%r, seed=%d, served_model_name=%s)", - vllm.__version__, + VLLM_VERSION, model_config.model, speculative_config, model_config.tokenizer, diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index e7503b965583..ea6275920c79 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse from prometheus_client import make_asgi_app from starlette.routing import Mount -import vllm import vllm.envs as envs from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine @@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding from vllm.logger import init_logger from vllm.usage.usage_lib import UsageContext +from vllm.version import __version__ as VLLM_VERSION TIMEOUT_KEEP_ALIVE = 5 # seconds @@ -93,7 +93,7 @@ async def show_available_models(): @app.get("/version") async def show_version(): - ver = {"version": vllm.__version__} + ver = {"version": VLLM_VERSION} return JSONResponse(content=ver) @@ -174,7 +174,7 @@ if __name__ == "__main__": raise ValueError(f"Invalid middleware {middleware}. " f"Must be a function or a class.") - logger.info("vLLM API server version %s", vllm.__version__) + logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("args: %s", args) if args.served_model_name is not None: diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py index 731f4f4a4028..7a6819c35a92 100644 --- a/vllm/entrypoints/openai/run_batch.py +++ b/vllm/entrypoints/openai/run_batch.py @@ -5,7 +5,6 @@ from io import StringIO import aiohttp -import vllm from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.entrypoints.openai.protocol import (BatchRequestInput, @@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.logger import init_logger from vllm.usage.usage_lib import UsageContext from vllm.utils import random_uuid +from vllm.version import __version__ as VLLM_VERSION logger = init_logger(__name__) @@ -135,7 +135,7 @@ async def main(args): if __name__ == "__main__": args = parse_args() - logger.info("vLLM API server version %s", vllm.__version__) + logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("args: %s", args) asyncio.run(main(args)) diff --git a/vllm/usage/usage_lib.py b/vllm/usage/usage_lib.py index 40a954a29493..afb3007a528b 100644 --- a/vllm/usage/usage_lib.py +++ b/vllm/usage/usage_lib.py @@ -16,6 +16,7 @@ import requests import torch import vllm.envs as envs +from vllm.version import __version__ as VLLM_VERSION _config_home = envs.VLLM_CONFIG_ROOT _USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json") @@ -163,9 +164,8 @@ class UsageMessage: ]) # vLLM information - import vllm # delayed import to prevent circular import self.context = usage_context.value - self.vllm_version = vllm.__version__ + self.vllm_version = VLLM_VERSION self.model_architecture = model_architecture # Metadata diff --git a/vllm/version.py b/vllm/version.py new file mode 100644 index 000000000000..3d187266f146 --- /dev/null +++ b/vllm/version.py @@ -0,0 +1 @@ +__version__ = "0.5.0"