mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 04:54:56 +08:00
[Misc] normalize multiprocessing Queue usage (#22371)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
parent
7e3a8dc906
commit
acf8aeb79e
@ -118,8 +118,17 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
|
||||
tensor_parallel_size=tp_size,
|
||||
))
|
||||
p.start()
|
||||
p.join()
|
||||
# Call queue.get() before p.join() to prevent deadlock:
|
||||
# If p.join() is called before queue.get() and the queue is full,
|
||||
# the child process may block while writing to the queue and never
|
||||
# terminate, causing the parent to wait indefinitely on p.join().
|
||||
# See: https://github.com/vllm-project/vllm/pull/22371#discussion_r2257773814
|
||||
out_before = queue.get()
|
||||
p.join()
|
||||
queue.close()
|
||||
queue.join_thread()
|
||||
|
||||
queue = ctx.Queue()
|
||||
|
||||
p = ctx.Process(target=_run_generate,
|
||||
args=(output_dir, queue),
|
||||
@ -131,7 +140,14 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
|
||||
load_format="sharded_state",
|
||||
))
|
||||
p.start()
|
||||
p.join()
|
||||
# Call queue.get() before p.join() to prevent deadlock:
|
||||
# If p.join() is called before queue.get() and the queue is full,
|
||||
# the child process may block while writing to the queue and never
|
||||
# terminate, causing the parent to wait indefinitely on p.join().
|
||||
# See: https://github.com/vllm-project/vllm/pull/22371#discussion_r2257773814
|
||||
out_after = queue.get()
|
||||
p.join()
|
||||
queue.close()
|
||||
queue.join_thread()
|
||||
|
||||
assert out_before == out_after
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user