From 2382ad29d1769f2b46d51fcc2cedbd6e00d4f180 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sat, 22 Feb 2025 20:28:59 +0800 Subject: [PATCH] [ci] fix linter (#13701) Signed-off-by: youkaichao --- examples/offline_inference/data_parallel.py | 9 +++++---- vllm/config.py | 2 +- vllm/utils.py | 1 + vllm/v1/engine/core_client.py | 3 ++- vllm/v1/worker/gpu_model_runner.py | 3 ++- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/offline_inference/data_parallel.py b/examples/offline_inference/data_parallel.py index a9544c8cf8a8..2e1fa50e2ab3 100644 --- a/examples/offline_inference/data_parallel.py +++ b/examples/offline_inference/data_parallel.py @@ -48,15 +48,16 @@ def main(dp_size, dp_rank, dp_master_ip, dp_master_port, GPUs_per_dp_rank): max_tokens=16 * (dp_rank + 1)) # Create an LLM. - llm = LLM(model="facebook/opt-125m", tensor_parallel_size=2, enforce_eager=True) + llm = LLM(model="facebook/opt-125m", + tensor_parallel_size=2, + enforce_eager=True) outputs = llm.generate(prompts, sampling_params) # Print the outputs. for output in outputs: prompt = output.prompt generated_text = output.outputs[0].text - print( - f"DP rank {dp_rank}, Prompt: {prompt!r}, " - f"Generated text: {generated_text!r}") + print(f"DP rank {dp_rank}, Prompt: {prompt!r}, " + f"Generated text: {generated_text!r}") if __name__ == "__main__": diff --git a/vllm/config.py b/vllm/config.py index ed32a5028790..d3139b5fd84e 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1372,7 +1372,7 @@ class ParallelConfig: @staticmethod def has_unfinished_dp(dp_group: "ProcessGroup", - has_unfinished: bool) -> bool: + has_unfinished: bool) -> bool: tensor = torch.tensor([has_unfinished], dtype=torch.int32, device="cpu") diff --git a/vllm/utils.py b/vllm/utils.py index 7d24154927b8..675edc3620b5 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -518,6 +518,7 @@ def get_open_port() -> int: return port return _get_open_port() + def _get_open_port() -> int: port = envs.VLLM_PORT if port is not None: diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index e898a872c62b..527aa72833ba 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -89,7 +89,7 @@ class EngineCoreClient(ABC): def execute_dummy_batch(self) -> None: raise NotImplementedError - + async def execute_dummy_batch_async(self) -> None: raise NotImplementedError @@ -343,6 +343,7 @@ class SyncMPClient(MPClient): def execute_dummy_batch(self) -> None: self._call_utility("execute_dummy_batch") + class AsyncMPClient(MPClient): """Asyncio-compatible client for multi-proc EngineCore.""" diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f002cbfccd40..a7b9d4781183 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1167,7 +1167,8 @@ class GPUModelRunner(LoRAModelRunnerMixin): for k, v in self.intermediate_tensors.items() }) - with set_forward_context(None, self.vllm_config, num_tokens=num_tokens): + with set_forward_context(None, self.vllm_config, + num_tokens=num_tokens): hidden_states = model( input_ids=input_ids, positions=positions,