diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 560f84a008291..8fc69d96d321e 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -677,12 +677,10 @@ class LLMEngine: self.model_executor.stop_remote_worker_execution_loop() @overload - @deprecated("'inputs' will be renamed to 'prompt") def add_request( self, request_id: str, - *, - inputs: PromptType, + prompt: PromptType, params: Union[SamplingParams, PoolingParams], arrival_time: Optional[float] = None, lora_request: Optional[LoRARequest] = None, @@ -693,10 +691,12 @@ class LLMEngine: ... @overload + @deprecated("'inputs' will be renamed to 'prompt") def add_request( self, request_id: str, - prompt: PromptType, + *, + inputs: PromptType, params: Union[SamplingParams, PoolingParams], arrival_time: Optional[float] = None, lora_request: Optional[LoRARequest] = None, diff --git a/vllm/engine/multiprocessing/__init__.py b/vllm/engine/multiprocessing/__init__.py index 7020012e8bb86..420f540d0b5f4 100644 --- a/vllm/engine/multiprocessing/__init__.py +++ b/vllm/engine/multiprocessing/__init__.py @@ -35,11 +35,9 @@ class RPCProcessRequest: priority: int = 0 @overload - @deprecated("'inputs' will be renamed to 'prompt") def __init__( self, - *, - inputs: PromptType, + prompt: PromptType, params: Union[SamplingParams, PoolingParams], request_id: str, lora_request: Optional[LoRARequest] = None, @@ -50,9 +48,11 @@ class RPCProcessRequest: ... @overload + @deprecated("'inputs' will be renamed to 'prompt") def __init__( self, - prompt: PromptType, + *, + inputs: PromptType, params: Union[SamplingParams, PoolingParams], request_id: str, lora_request: Optional[LoRARequest] = None, diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py index 7e4f81b2cf8e2..32bd83305bb8f 100644 --- a/vllm/engine/multiprocessing/client.py +++ b/vllm/engine/multiprocessing/client.py @@ -415,11 +415,9 @@ class MQLLMEngineClient(EngineClient): return ENGINE_DEAD_ERROR(self._errored_with) @overload - @deprecated("'inputs' will be renamed to 'prompt") def generate( self, - *, - inputs: PromptType, + prompt: PromptType, sampling_params: SamplingParams, request_id: str, lora_request: Optional[LoRARequest] = None, @@ -430,9 +428,11 @@ class MQLLMEngineClient(EngineClient): ... @overload + @deprecated("'inputs' will be renamed to 'prompt") def generate( self, - prompt: PromptType, + *, + inputs: PromptType, sampling_params: SamplingParams, request_id: str, lora_request: Optional[LoRARequest] = None, @@ -487,11 +487,9 @@ class MQLLMEngineClient(EngineClient): prompt_adapter_request, priority) @overload - @deprecated("'inputs' will be renamed to 'prompt") def encode( self, - *, - inputs: PromptType, + prompt: PromptType, pooling_params: PoolingParams, request_id: str, lora_request: Optional[LoRARequest] = None, @@ -501,9 +499,11 @@ class MQLLMEngineClient(EngineClient): ... @overload + @deprecated("'inputs' will be renamed to 'prompt") def encode( self, - prompt: PromptType, + *, + inputs: PromptType, pooling_params: PoolingParams, request_id: str, lora_request: Optional[LoRARequest] = None, diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 8de30ccd18a11..2a02187223a33 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -252,8 +252,21 @@ class LLM: else: tokenizer_group.tokenizer = get_cached_tokenizer(tokenizer) + @overload + def generate( + self, + prompts: Union[PromptType, Sequence[PromptType]], + /, + *, + sampling_params: Optional[Union[SamplingParams, + Sequence[SamplingParams]]] = None, + use_tqdm: bool = True, + lora_request: Optional[Union[List[LoRARequest], LoRARequest]] = None, + ) -> List[RequestOutput]: + ... + @overload # LEGACY: single (prompt + optional token ids) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def generate( self, prompts: str, @@ -266,7 +279,7 @@ class LLM: ... @overload # LEGACY: multi (prompt + optional token ids) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def generate( self, prompts: List[str], @@ -279,7 +292,7 @@ class LLM: ... @overload # LEGACY: single (token ids + optional prompt) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def generate( self, prompts: Optional[str] = None, @@ -293,7 +306,7 @@ class LLM: ... @overload # LEGACY: multi (token ids + optional prompt) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def generate( self, prompts: Optional[List[str]] = None, @@ -307,7 +320,7 @@ class LLM: ... @overload # LEGACY: single or multi token ids [pos-only] - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def generate( self, prompts: None, @@ -318,19 +331,6 @@ class LLM: ) -> List[RequestOutput]: ... - @overload - def generate( - self, - prompts: Union[PromptType, Sequence[PromptType]], - /, - *, - sampling_params: Optional[Union[SamplingParams, - Sequence[SamplingParams]]] = None, - use_tqdm: bool = True, - lora_request: Optional[Union[List[LoRARequest], LoRARequest]] = None, - ) -> List[RequestOutput]: - ... - @deprecate_kwargs( "prompt_token_ids", is_deprecated=lambda: LLM.DEPRECATE_LEGACY, @@ -672,8 +672,21 @@ class LLM: lora_request=lora_request, ) + @overload + def encode( + self, + prompts: Union[PromptType, Sequence[PromptType]], + /, + *, + pooling_params: Optional[Union[PoolingParams, + Sequence[PoolingParams]]] = None, + use_tqdm: bool = True, + lora_request: Optional[Union[List[LoRARequest], LoRARequest]] = None, + ) -> List[PoolingRequestOutput]: + ... + @overload # LEGACY: single (prompt + optional token ids) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def encode( self, prompts: str, @@ -686,7 +699,7 @@ class LLM: ... @overload # LEGACY: multi (prompt + optional token ids) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def encode( self, prompts: List[str], @@ -699,7 +712,7 @@ class LLM: ... @overload # LEGACY: single (token ids + optional prompt) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def encode( self, prompts: Optional[str] = None, @@ -713,7 +726,7 @@ class LLM: ... @overload # LEGACY: multi (token ids + optional prompt) - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def encode( self, prompts: Optional[List[str]] = None, @@ -727,7 +740,7 @@ class LLM: ... @overload # LEGACY: single or multi token ids [pos-only] - @deprecated("'prompt_token_ids' will become part of 'prompts") + @deprecated("'prompt_token_ids' will become part of 'prompts'") def encode( self, prompts: None, @@ -738,19 +751,6 @@ class LLM: ) -> List[PoolingRequestOutput]: ... - @overload - def encode( - self, - prompts: Union[PromptType, Sequence[PromptType]], - /, - *, - pooling_params: Optional[Union[PoolingParams, - Sequence[PoolingParams]]] = None, - use_tqdm: bool = True, - lora_request: Optional[Union[List[LoRARequest], LoRARequest]] = None, - ) -> List[PoolingRequestOutput]: - ... - @deprecate_kwargs( "prompt_token_ids", is_deprecated=lambda: LLM.DEPRECATE_LEGACY,