[Doc] Fix cross-reference warnings (#25058)

Signed-off-by: Punit Vara <punitvara@gmail.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Punitvara 2025-09-18 14:35:16 +05:30 committed by GitHub
parent aa3f105c59
commit 05b044e698
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 18 additions and 14 deletions

View File

@ -171,7 +171,8 @@ class BenchmarkDataset(ABC):
If `None`, LoRA is not used. If `None`, LoRA is not used.
Returns: Returns:
A new [LoRARequest][] (or `None` if not applicable). A new [`LoRARequest`][vllm.lora.request.LoRARequest]
(or `None` if not applicable).
""" """
if max_loras is None or lora_path is None: if max_loras is None or lora_path is None:
return None return None

View File

@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer:
- Maintains metadata for each allocated buffer chunk in the writer process - Maintains metadata for each allocated buffer chunk in the writer process
- Supports custom "is_free_fn" functions to determine when buffers can be - Supports custom "is_free_fn" functions to determine when buffers can be
reused reused
- Each buffer chunk contains: [4-byte id][4-byte size][actual_data] - Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]`
Key Concepts: Key Concepts:
- monotonic_id_start/end: Track the range of active buffer IDs - monotonic_id_start/end: Track the range of active buffer IDs
@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer:
- Writer handles garbage collection (free_buf) based on reader feedback - Writer handles garbage collection (free_buf) based on reader feedback
Memory Layout per Buffer Chunk: Memory Layout per Buffer Chunk:
[4-byte monotonic_id][4-byte chunk_size][actual_data...] `[4-byte monotonic_id][4-byte chunk_size][actual_data...]`
^metadata_start ^data_start ^metadata_start ^data_start
The monotonic_id ensures data integrity - readers can verify they're The monotonic_id ensures data integrity - readers can verify they're
@ -185,7 +185,7 @@ class SingleWriterShmRingBuffer:
''' '''
Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory. Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory.
Memory layout: Memory layout:
[4-byte monotonic_id][4-byte size][buffer data...] `[4-byte monotonic_id][4-byte size][buffer data...]`
''' '''
assert self.is_writer, "Only the writer can allocate buffers." assert self.is_writer, "Only the writer can allocate buffers."
assert size > 0, "Size must be greater than 0" assert size > 0, "Size must be greater than 0"
@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage:
allocation allocation
Memory Layout per Object: Memory Layout per Object:
[4-byte reference_count][metadata_size][serialized_object_data] `[4-byte reference_count][metadata_size][serialized_object_data]`
Thread Safety: Thread Safety:
- Writer operations (put, clear) are single-threaded by design - Writer operations (put, clear) are single-threaded by design

View File

@ -927,11 +927,13 @@ def causal_conv1d_update(
validate_data=False, validate_data=False,
): ):
""" """
x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim) x: Input tensor which can take the following shapes:
[shape=2: single token prediction]
[shape=3: single or multiple tokens prediction] - `[batch, dim]` - single token prediction
[shape=2 with num_tokens: continuous batching, where num_tokens is the - `[batch, dim, seqlen]` - single or multiple tokens prediction
total tokens of all sequences in that batch] - `[num_tokens, dim]` - continuous batching, where num_tokens is
the total tokens of all sequences in that batch
conv_state: (..., dim, state_len), where state_len >= width - 1 conv_state: (..., dim, state_len), where state_len >= width - 1
weight: (dim, width) weight: (dim, width)
bias: (dim,) bias: (dim,)

View File

@ -583,7 +583,7 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA,
inputs_embeds: Optional tensor of input embeddings. inputs_embeds: Optional tensor of input embeddings.
Info: Info:
[Mistral3ImagePixelInputs][] [`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs]
""" """
if intermediate_tensors is not None: if intermediate_tensors is not None:
inputs_embeds = None inputs_embeds = None

View File

@ -301,7 +301,7 @@ class MultiModalProfiler(Generic[_I]):
Returns the maximum length of the multimodal (image placeholders+text) Returns the maximum length of the multimodal (image placeholders+text)
tokens, including any break/text tokens in-between image embeddings. tokens, including any break/text tokens in-between image embeddings.
<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end> `<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>`
Returns 9, even when the number of image embeddings is 6. Returns 9, even when the number of image embeddings is 6.
This is important to take into account when profiling and This is important to take into account when profiling and

View File

@ -24,8 +24,9 @@ class KVCacheBlocks:
""" """
blocks: tuple[list[KVCacheBlock], ...] blocks: tuple[list[KVCacheBlock], ...]
""" """
blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens. `blocks[i][j]` refers to the i-th kv_cache_group
We don't use block of tokens as the outer dimension because it assumes all and the j-th block of tokens.We don't use block of
tokens as the outer dimension because it assumes all
kv_cache_groups have the same number of blocks, which is true for now but kv_cache_groups have the same number of blocks, which is true for now but
will be broken if we want to give different block_size to different will be broken if we want to give different block_size to different
kv_cache_groups in the future. kv_cache_groups in the future.