mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:45:01 +08:00
[ci] fix linter (#13701)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
3e472d882a
commit
2382ad29d1
@ -48,15 +48,16 @@ def main(dp_size, dp_rank, dp_master_ip, dp_master_port, GPUs_per_dp_rank):
|
||||
max_tokens=16 * (dp_rank + 1))
|
||||
|
||||
# Create an LLM.
|
||||
llm = LLM(model="facebook/opt-125m", tensor_parallel_size=2, enforce_eager=True)
|
||||
llm = LLM(model="facebook/opt-125m",
|
||||
tensor_parallel_size=2,
|
||||
enforce_eager=True)
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
# Print the outputs.
|
||||
for output in outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs[0].text
|
||||
print(
|
||||
f"DP rank {dp_rank}, Prompt: {prompt!r}, "
|
||||
f"Generated text: {generated_text!r}")
|
||||
print(f"DP rank {dp_rank}, Prompt: {prompt!r}, "
|
||||
f"Generated text: {generated_text!r}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -1372,7 +1372,7 @@ class ParallelConfig:
|
||||
|
||||
@staticmethod
|
||||
def has_unfinished_dp(dp_group: "ProcessGroup",
|
||||
has_unfinished: bool) -> bool:
|
||||
has_unfinished: bool) -> bool:
|
||||
tensor = torch.tensor([has_unfinished],
|
||||
dtype=torch.int32,
|
||||
device="cpu")
|
||||
|
||||
@ -518,6 +518,7 @@ def get_open_port() -> int:
|
||||
return port
|
||||
return _get_open_port()
|
||||
|
||||
|
||||
def _get_open_port() -> int:
|
||||
port = envs.VLLM_PORT
|
||||
if port is not None:
|
||||
|
||||
@ -89,7 +89,7 @@ class EngineCoreClient(ABC):
|
||||
|
||||
def execute_dummy_batch(self) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
async def execute_dummy_batch_async(self) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
@ -343,6 +343,7 @@ class SyncMPClient(MPClient):
|
||||
def execute_dummy_batch(self) -> None:
|
||||
self._call_utility("execute_dummy_batch")
|
||||
|
||||
|
||||
class AsyncMPClient(MPClient):
|
||||
"""Asyncio-compatible client for multi-proc EngineCore."""
|
||||
|
||||
|
||||
@ -1167,7 +1167,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
||||
for k, v in self.intermediate_tensors.items()
|
||||
})
|
||||
|
||||
with set_forward_context(None, self.vllm_config, num_tokens=num_tokens):
|
||||
with set_forward_context(None, self.vllm_config,
|
||||
num_tokens=num_tokens):
|
||||
hidden_states = model(
|
||||
input_ids=input_ids,
|
||||
positions=positions,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user