[Bugfix] Fix allocation & free logic of SingleWriterShmRingBuffer (#27117)

Signed-off-by: Kero Liang <kerorek@outlook.com>
Signed-off-by: Roger Wang <hey@rogerw.io>
Co-authored-by: donglu <donglu@cohere.com>
Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Kero Liang 2025-10-28 23:01:24 +08:00 committed by GitHub
parent e88bdd60d9
commit 02af36df36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 75 additions and 4 deletions

View File

@ -4,6 +4,8 @@
import traceback
import unittest
import numpy as np
from vllm.distributed.device_communicators.shm_object_storage import (
SingleWriterShmRingBuffer,
)
@ -113,6 +115,69 @@ class TestSingleWriterShmRingBuffer(unittest.TestCase):
self.assertEqual(self.ring_buffer.data_buffer_start, 0)
self.assertEqual(self.ring_buffer.data_buffer_end, 0)
def test_allocation_cycles(self):
buffer_size = 100
ring = SingleWriterShmRingBuffer(data_buffer_size=buffer_size, create=True)
# tracking allocations for assertions
allocated_bitmap = np.zeros(
(buffer_size,), dtype=np.bool_
) # addr -> is_allocated
allocation_map = dict() # monotonic_id -> (addr, size)
def count_allocated(bitmap) -> int:
return np.sum(bitmap).item()
def is_free_fn(a, b) -> bool:
return True
def mark_allocated_with_assertion(id, addr, size):
addr = addr % buffer_size
self.assertEqual(count_allocated(allocated_bitmap[addr : addr + size]), 0)
allocated_bitmap[addr : addr + size] = True
allocation_map[id] = (addr, size)
def mark_freed_with_assertion(id):
self.assertTrue(id in allocation_map)
addr, size = allocation_map.pop(id)
addr = addr % buffer_size
self.assertEqual(
count_allocated(allocated_bitmap[addr : addr + size]), size
)
allocated_bitmap[addr : addr + size] = False
def ring_free(free_size=None):
freed_ids = ring.free_buf(is_free_fn, free_size)
for freed_id in freed_ids:
mark_freed_with_assertion(freed_id)
def ring_allocate(allocate_size):
allocate_size_with_md = allocate_size + ring.MD_SIZE
try:
addr, monotonic_id = ring.allocate_buf(allocate_size)
mark_allocated_with_assertion(monotonic_id, addr, allocate_size_with_md)
except MemoryError:
# free 2x size for enough space if wrapping happened
ring_free(allocate_size_with_md * 2)
# retry allocating
addr, monotonic_id = ring.allocate_buf(allocate_size)
mark_allocated_with_assertion(monotonic_id, addr, allocate_size_with_md)
# 1. allocation & free cycles
for _ in range(33):
# will consume 2 + 8 = 10 bytes per allocation
ring_allocate(2)
# 2. free all allocations
ring_free()
# 3. try allocate the largest possible buffer
ring_allocate(buffer_size - ring.MD_SIZE)
def main():
"""Main function demonstrating usage and running tests"""

View File

@ -127,9 +127,7 @@ class SingleWriterShmRingBuffer:
if create:
# we are creating a buffer
self.metadata = {
self.monotonic_id_end: self.data_buffer_end
} # monotonic_id -> start address
self.metadata: dict[int, int] = {} # monotonic_id -> start address
self.shared_memory = shared_memory.SharedMemory(
create=True, size=self.data_buffer_size, name=name
)
@ -288,7 +286,15 @@ class SingleWriterShmRingBuffer:
self.monotonic_id_start = (
self.monotonic_id_start + 1
) % self.ID_MAX
self.data_buffer_start = address
if self.monotonic_id_start in self.metadata:
# pointing to the start addr of next allocation
self.data_buffer_start += (
self.metadata[self.monotonic_id_start]
- self.data_buffer_start
) % self.data_buffer_size
else:
# no remaining allocation, reset to zero
self.data_buffer_start = self.data_buffer_end = 0
freed_bytes += metadata[1]
else:
# there are still readers, we cannot free the buffer