mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 13:35:36 +08:00
[Bugfix] Fix allocation & free logic of SingleWriterShmRingBuffer (#27117)
Signed-off-by: Kero Liang <kerorek@outlook.com> Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored-by: donglu <donglu@cohere.com> Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
e88bdd60d9
commit
02af36df36
@ -4,6 +4,8 @@
|
|||||||
import traceback
|
import traceback
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from vllm.distributed.device_communicators.shm_object_storage import (
|
from vllm.distributed.device_communicators.shm_object_storage import (
|
||||||
SingleWriterShmRingBuffer,
|
SingleWriterShmRingBuffer,
|
||||||
)
|
)
|
||||||
@ -113,6 +115,69 @@ class TestSingleWriterShmRingBuffer(unittest.TestCase):
|
|||||||
self.assertEqual(self.ring_buffer.data_buffer_start, 0)
|
self.assertEqual(self.ring_buffer.data_buffer_start, 0)
|
||||||
self.assertEqual(self.ring_buffer.data_buffer_end, 0)
|
self.assertEqual(self.ring_buffer.data_buffer_end, 0)
|
||||||
|
|
||||||
|
def test_allocation_cycles(self):
|
||||||
|
buffer_size = 100
|
||||||
|
ring = SingleWriterShmRingBuffer(data_buffer_size=buffer_size, create=True)
|
||||||
|
|
||||||
|
# tracking allocations for assertions
|
||||||
|
allocated_bitmap = np.zeros(
|
||||||
|
(buffer_size,), dtype=np.bool_
|
||||||
|
) # addr -> is_allocated
|
||||||
|
allocation_map = dict() # monotonic_id -> (addr, size)
|
||||||
|
|
||||||
|
def count_allocated(bitmap) -> int:
|
||||||
|
return np.sum(bitmap).item()
|
||||||
|
|
||||||
|
def is_free_fn(a, b) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def mark_allocated_with_assertion(id, addr, size):
|
||||||
|
addr = addr % buffer_size
|
||||||
|
self.assertEqual(count_allocated(allocated_bitmap[addr : addr + size]), 0)
|
||||||
|
|
||||||
|
allocated_bitmap[addr : addr + size] = True
|
||||||
|
allocation_map[id] = (addr, size)
|
||||||
|
|
||||||
|
def mark_freed_with_assertion(id):
|
||||||
|
self.assertTrue(id in allocation_map)
|
||||||
|
|
||||||
|
addr, size = allocation_map.pop(id)
|
||||||
|
addr = addr % buffer_size
|
||||||
|
self.assertEqual(
|
||||||
|
count_allocated(allocated_bitmap[addr : addr + size]), size
|
||||||
|
)
|
||||||
|
|
||||||
|
allocated_bitmap[addr : addr + size] = False
|
||||||
|
|
||||||
|
def ring_free(free_size=None):
|
||||||
|
freed_ids = ring.free_buf(is_free_fn, free_size)
|
||||||
|
for freed_id in freed_ids:
|
||||||
|
mark_freed_with_assertion(freed_id)
|
||||||
|
|
||||||
|
def ring_allocate(allocate_size):
|
||||||
|
allocate_size_with_md = allocate_size + ring.MD_SIZE
|
||||||
|
try:
|
||||||
|
addr, monotonic_id = ring.allocate_buf(allocate_size)
|
||||||
|
mark_allocated_with_assertion(monotonic_id, addr, allocate_size_with_md)
|
||||||
|
except MemoryError:
|
||||||
|
# free 2x size for enough space if wrapping happened
|
||||||
|
ring_free(allocate_size_with_md * 2)
|
||||||
|
|
||||||
|
# retry allocating
|
||||||
|
addr, monotonic_id = ring.allocate_buf(allocate_size)
|
||||||
|
mark_allocated_with_assertion(monotonic_id, addr, allocate_size_with_md)
|
||||||
|
|
||||||
|
# 1. allocation & free cycles
|
||||||
|
for _ in range(33):
|
||||||
|
# will consume 2 + 8 = 10 bytes per allocation
|
||||||
|
ring_allocate(2)
|
||||||
|
|
||||||
|
# 2. free all allocations
|
||||||
|
ring_free()
|
||||||
|
|
||||||
|
# 3. try allocate the largest possible buffer
|
||||||
|
ring_allocate(buffer_size - ring.MD_SIZE)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main function demonstrating usage and running tests"""
|
"""Main function demonstrating usage and running tests"""
|
||||||
|
|||||||
@ -127,9 +127,7 @@ class SingleWriterShmRingBuffer:
|
|||||||
|
|
||||||
if create:
|
if create:
|
||||||
# we are creating a buffer
|
# we are creating a buffer
|
||||||
self.metadata = {
|
self.metadata: dict[int, int] = {} # monotonic_id -> start address
|
||||||
self.monotonic_id_end: self.data_buffer_end
|
|
||||||
} # monotonic_id -> start address
|
|
||||||
self.shared_memory = shared_memory.SharedMemory(
|
self.shared_memory = shared_memory.SharedMemory(
|
||||||
create=True, size=self.data_buffer_size, name=name
|
create=True, size=self.data_buffer_size, name=name
|
||||||
)
|
)
|
||||||
@ -288,7 +286,15 @@ class SingleWriterShmRingBuffer:
|
|||||||
self.monotonic_id_start = (
|
self.monotonic_id_start = (
|
||||||
self.monotonic_id_start + 1
|
self.monotonic_id_start + 1
|
||||||
) % self.ID_MAX
|
) % self.ID_MAX
|
||||||
self.data_buffer_start = address
|
if self.monotonic_id_start in self.metadata:
|
||||||
|
# pointing to the start addr of next allocation
|
||||||
|
self.data_buffer_start += (
|
||||||
|
self.metadata[self.monotonic_id_start]
|
||||||
|
- self.data_buffer_start
|
||||||
|
) % self.data_buffer_size
|
||||||
|
else:
|
||||||
|
# no remaining allocation, reset to zero
|
||||||
|
self.data_buffer_start = self.data_buffer_end = 0
|
||||||
freed_bytes += metadata[1]
|
freed_bytes += metadata[1]
|
||||||
else:
|
else:
|
||||||
# there are still readers, we cannot free the buffer
|
# there are still readers, we cannot free the buffer
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user