From 2e3b969ec0d46e2cfff041a07f29a2ca4bb82bbd Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Tue, 11 Feb 2025 22:06:46 +0800 Subject: [PATCH] [Platform] add pre_register_and_update function (#12432) Signed-off-by: wangxiyuan --- vllm/config.py | 3 ++- vllm/engine/arg_utils.py | 21 +++++++++++++++++++++ vllm/platforms/interface.py | 18 ++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/vllm/config.py b/vllm/config.py index 426ba38080270..1d8c42dd276a6 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -3057,7 +3057,8 @@ class VllmConfig: kv_transfer_config: KVTransferConfig = field(default=None, init=True) # type: ignore # some opaque config, only used to provide additional information - # for the hash computation, mainly used for testing and debugging. + # for the hash computation, mainly used for testing, debugging or out of + # tree config registration. additional_config: SupportsHash = field(default=None, init=True) # type: ignore instance_id: str = "" diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 40c6fb4567993..4232ad9204f44 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -20,6 +20,7 @@ from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat, from vllm.executor.executor_base import ExecutorBase from vllm.logger import init_logger from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS +from vllm.plugins import load_general_plugins from vllm.transformers_utils.utils import check_gguf_file from vllm.usage.usage_lib import UsageContext from vllm.utils import FlexibleArgumentParser, StoreBoolean @@ -203,6 +204,8 @@ class EngineArgs: calculate_kv_scales: Optional[bool] = None + additional_config: Optional[Dict[str, Any]] = None + def __post_init__(self): if not self.tokenizer: self.tokenizer = self.model @@ -984,6 +987,14 @@ class EngineArgs: 'be loaded from the model checkpoint if available. ' 'Otherwise, the scales will default to 1.0.') + parser.add_argument( + "--additional-config", + type=json.loads, + default=None, + help="Additional config for specified platform in JSON format. " + "Different platforms may support different configs. Make sure the " + "configs are valid for the platform you are using. The input format" + " is like '{\"config_key\":\"config_value\"}'") return parser @classmethod @@ -1044,6 +1055,9 @@ class EngineArgs: def create_engine_config(self, usage_context: Optional[UsageContext] = None ) -> VllmConfig: + from vllm.platforms import current_platform + current_platform.pre_register_and_update() + if envs.VLLM_USE_V1: self._override_v1_engine_args(usage_context) @@ -1287,6 +1301,7 @@ class EngineArgs: prompt_adapter_config=prompt_adapter_config, compilation_config=self.compilation_config, kv_transfer_config=self.kv_transfer_config, + additional_config=self.additional_config, ) if envs.VLLM_USE_V1: @@ -1347,6 +1362,12 @@ class AsyncEngineArgs(EngineArgs): parser.add_argument('--disable-log-requests', action='store_true', help='Disable logging requests.') + # Initialize plugin to update the parser, for example, The plugin may + # adding a new kind of quantization method to --quantization argument or + # a new device to --device argument. + load_general_plugins() + from vllm.platforms import current_platform + current_platform.pre_register_and_update(parser) return parser diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 645d98a1bb42c..61673b08543f6 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -13,8 +13,10 @@ from vllm.logger import init_logger if TYPE_CHECKING: from vllm.config import VllmConfig + from vllm.utils import FlexibleArgumentParser else: VllmConfig = None + FlexibleArgumentParser = None logger = init_logger(__name__) @@ -223,6 +225,22 @@ class Platform: np.random.seed(seed) torch.manual_seed(seed) + @classmethod + def pre_register_and_update(cls, + parser: Optional[FlexibleArgumentParser] = None + ) -> None: + """ + Do some pre-registeration or update action for the current platform. + + This function is called before global VllmConfig is initialized or cli + arguments are parsed. It's used for out-of-tree platforms to register or + update the configuration. + + For example, the out-of-tree quantization config can be imported and + registered here dynamically. + """ + pass + @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: """