mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-08 20:07:09 +08:00
[ci] set timeout for test_oot_registration.py (#7082)
This commit is contained in:
parent
c16eaac500
commit
806949514a
@ -36,10 +36,12 @@ def test_oot_registration_for_api_server():
|
||||
ctx = torch.multiprocessing.get_context()
|
||||
server = ctx.Process(target=server_function, args=(port, ))
|
||||
server.start()
|
||||
MAX_SERVER_START_WAIT_S = 60
|
||||
client = OpenAI(
|
||||
base_url=f"http://localhost:{port}/v1",
|
||||
api_key="token-abc123",
|
||||
)
|
||||
now = time.time()
|
||||
while True:
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
@ -57,6 +59,8 @@ def test_oot_registration_for_api_server():
|
||||
except OpenAIError as e:
|
||||
if "Connection error" in str(e):
|
||||
time.sleep(3)
|
||||
if time.time() - now > MAX_SERVER_START_WAIT_S:
|
||||
raise RuntimeError("Server did not start in time") from e
|
||||
else:
|
||||
raise e
|
||||
server.kill()
|
||||
|
||||
@ -186,7 +186,9 @@ class Worker(LocalOrDistributedWorkerBase):
|
||||
# GPU did not change their memory usage during the profiling.
|
||||
peak_memory = self.init_gpu_memory - free_gpu_memory
|
||||
assert peak_memory > 0, (
|
||||
"Error in memory profiling. This happens when the GPU memory was "
|
||||
"Error in memory profiling. "
|
||||
f"Initial free memory {self.init_gpu_memory}, current free memory"
|
||||
f" {free_gpu_memory}. This happens when the GPU memory was "
|
||||
"not properly cleaned up before initializing the vLLM instance.")
|
||||
|
||||
cache_block_size = self.get_cache_block_size_bytes()
|
||||
|
||||
@ -138,7 +138,9 @@ class XPUWorker(LoraNotSupportedWorkerBase, Worker):
|
||||
# GPU did not change their memory usage during the profiling.
|
||||
peak_memory = self.init_gpu_memory - free_gpu_memory
|
||||
assert peak_memory > 0, (
|
||||
"Error in memory profiling. This happens when the GPU memory was "
|
||||
"Error in memory profiling. "
|
||||
f"Initial free memory {self.init_gpu_memory}, current free memory"
|
||||
f" {free_gpu_memory}. This happens when the GPU memory was "
|
||||
"not properly cleaned up before initializing the vLLM instance.")
|
||||
|
||||
cache_block_size = self.get_cache_block_size_bytes()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user