[Doc] Fix cross-reference warnings (#25058)

Signed-off-by: Punit Vara <punitvara@gmail.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Punitvara 2025-09-18 14:35:16 +05:30 committed by GitHub
parent aa3f105c59
commit 05b044e698
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 18 additions and 14 deletions

View File

@ -171,7 +171,8 @@ class BenchmarkDataset(ABC):
If `None`, LoRA is not used.
Returns:
A new [LoRARequest][] (or `None` if not applicable).
A new [`LoRARequest`][vllm.lora.request.LoRARequest]
(or `None` if not applicable).
"""
if max_loras is None or lora_path is None:
return None

View File

@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer:
- Maintains metadata for each allocated buffer chunk in the writer process
- Supports custom "is_free_fn" functions to determine when buffers can be
reused
- Each buffer chunk contains: [4-byte id][4-byte size][actual_data]
- Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]`
Key Concepts:
- monotonic_id_start/end: Track the range of active buffer IDs
@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer:
- Writer handles garbage collection (free_buf) based on reader feedback
Memory Layout per Buffer Chunk:
[4-byte monotonic_id][4-byte chunk_size][actual_data...]
`[4-byte monotonic_id][4-byte chunk_size][actual_data...]`
^metadata_start ^data_start
The monotonic_id ensures data integrity - readers can verify they're
@ -185,7 +185,7 @@ class SingleWriterShmRingBuffer:
'''
Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory.
Memory layout:
[4-byte monotonic_id][4-byte size][buffer data...]
`[4-byte monotonic_id][4-byte size][buffer data...]`
'''
assert self.is_writer, "Only the writer can allocate buffers."
assert size > 0, "Size must be greater than 0"
@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage:
allocation
Memory Layout per Object:
[4-byte reference_count][metadata_size][serialized_object_data]
`[4-byte reference_count][metadata_size][serialized_object_data]`
Thread Safety:
- Writer operations (put, clear) are single-threaded by design

View File

@ -927,11 +927,13 @@ def causal_conv1d_update(
validate_data=False,
):
"""
x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim)
[shape=2: single token prediction]
[shape=3: single or multiple tokens prediction]
[shape=2 with num_tokens: continuous batching, where num_tokens is the
total tokens of all sequences in that batch]
x: Input tensor which can take the following shapes:
- `[batch, dim]` - single token prediction
- `[batch, dim, seqlen]` - single or multiple tokens prediction
- `[num_tokens, dim]` - continuous batching, where num_tokens is
the total tokens of all sequences in that batch
conv_state: (..., dim, state_len), where state_len >= width - 1
weight: (dim, width)
bias: (dim,)

View File

@ -583,7 +583,7 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA,
inputs_embeds: Optional tensor of input embeddings.
Info:
[Mistral3ImagePixelInputs][]
[`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs]
"""
if intermediate_tensors is not None:
inputs_embeds = None

View File

@ -301,7 +301,7 @@ class MultiModalProfiler(Generic[_I]):
Returns the maximum length of the multimodal (image placeholders+text)
tokens, including any break/text tokens in-between image embeddings.
<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>
`<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>`
Returns 9, even when the number of image embeddings is 6.
This is important to take into account when profiling and

View File

@ -24,8 +24,9 @@ class KVCacheBlocks:
"""
blocks: tuple[list[KVCacheBlock], ...]
"""
blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens.
We don't use block of tokens as the outer dimension because it assumes all
`blocks[i][j]` refers to the i-th kv_cache_group
and the j-th block of tokens.We don't use block of
tokens as the outer dimension because it assumes all
kv_cache_groups have the same number of blocks, which is true for now but
will be broken if we want to give different block_size to different
kv_cache_groups in the future.