diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 6eaf328eb1655..888f57b1ac1df 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -367,7 +367,7 @@ class EngineArgs: config_format: str = ModelConfig.config_format dtype: ModelDType = ModelConfig.dtype kv_cache_dtype: CacheDType = CacheConfig.cache_dtype - seed: int | None = None + seed: int | None = 0 max_model_len: int | None = ModelConfig.max_model_len cuda_graph_sizes: list[int] | None = CompilationConfig.cudagraph_capture_sizes cudagraph_capture_sizes: list[int] | None = ( @@ -1192,6 +1192,12 @@ class EngineArgs: # VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here # doesn't affect the user process. if self.seed is None: + logger.warning_once( + "`seed=None` is equivalent to `seed=0` in V1 Engine. " + "You will no longer be allowed to pass `None` in v0.13.", + scope="local", + ) + self.seed = 0 if not envs.VLLM_ENABLE_V1_MULTIPROCESSING: logger.warning( @@ -1203,28 +1209,31 @@ class EngineArgs: ) if self.disable_mm_preprocessor_cache: - logger.warning( + logger.warning_once( "`--disable-mm-preprocessor-cache` is deprecated " "and will be removed in v0.13. " "Please use `--mm-processor-cache-gb 0` instead.", + scope="local", ) self.mm_processor_cache_gb = 0 elif envs.VLLM_MM_INPUT_CACHE_GIB != 4: - logger.warning( + logger.warning_once( "VLLM_MM_INPUT_CACHE_GIB` is deprecated " "and will be removed in v0.13. " "Please use `--mm-processor-cache-gb %d` instead.", envs.VLLM_MM_INPUT_CACHE_GIB, + scope="local", ) self.mm_processor_cache_gb = envs.VLLM_MM_INPUT_CACHE_GIB if self.enable_multimodal_encoder_data_parallel: - logger.warning( + logger.warning_once( "--enable-multimodal-encoder-data-parallel` is deprecated " "and will be removed in v0.13. " - "Please use `--mm-encoder-tp-mode data` instead." + "Please use `--mm-encoder-tp-mode data` instead.", + scope="local", ) self.mm_encoder_tp_mode = "data"