mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 23:34:27 +08:00
[Doc] Fix cross-reference warnings (#25058)
Signed-off-by: Punit Vara <punitvara@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
aa3f105c59
commit
05b044e698
@ -171,7 +171,8 @@ class BenchmarkDataset(ABC):
|
||||
If `None`, LoRA is not used.
|
||||
|
||||
Returns:
|
||||
A new [LoRARequest][] (or `None` if not applicable).
|
||||
A new [`LoRARequest`][vllm.lora.request.LoRARequest]
|
||||
(or `None` if not applicable).
|
||||
"""
|
||||
if max_loras is None or lora_path is None:
|
||||
return None
|
||||
|
||||
@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer:
|
||||
- Maintains metadata for each allocated buffer chunk in the writer process
|
||||
- Supports custom "is_free_fn" functions to determine when buffers can be
|
||||
reused
|
||||
- Each buffer chunk contains: [4-byte id][4-byte size][actual_data]
|
||||
- Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]`
|
||||
|
||||
Key Concepts:
|
||||
- monotonic_id_start/end: Track the range of active buffer IDs
|
||||
@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer:
|
||||
- Writer handles garbage collection (free_buf) based on reader feedback
|
||||
|
||||
Memory Layout per Buffer Chunk:
|
||||
[4-byte monotonic_id][4-byte chunk_size][actual_data...]
|
||||
`[4-byte monotonic_id][4-byte chunk_size][actual_data...]`
|
||||
^metadata_start ^data_start
|
||||
|
||||
The monotonic_id ensures data integrity - readers can verify they're
|
||||
@ -185,7 +185,7 @@ class SingleWriterShmRingBuffer:
|
||||
'''
|
||||
Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory.
|
||||
Memory layout:
|
||||
[4-byte monotonic_id][4-byte size][buffer data...]
|
||||
`[4-byte monotonic_id][4-byte size][buffer data...]`
|
||||
'''
|
||||
assert self.is_writer, "Only the writer can allocate buffers."
|
||||
assert size > 0, "Size must be greater than 0"
|
||||
@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage:
|
||||
allocation
|
||||
|
||||
Memory Layout per Object:
|
||||
[4-byte reference_count][metadata_size][serialized_object_data]
|
||||
`[4-byte reference_count][metadata_size][serialized_object_data]`
|
||||
|
||||
Thread Safety:
|
||||
- Writer operations (put, clear) are single-threaded by design
|
||||
|
||||
@ -927,11 +927,13 @@ def causal_conv1d_update(
|
||||
validate_data=False,
|
||||
):
|
||||
"""
|
||||
x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim)
|
||||
[shape=2: single token prediction]
|
||||
[shape=3: single or multiple tokens prediction]
|
||||
[shape=2 with num_tokens: continuous batching, where num_tokens is the
|
||||
total tokens of all sequences in that batch]
|
||||
x: Input tensor which can take the following shapes:
|
||||
|
||||
- `[batch, dim]` - single token prediction
|
||||
- `[batch, dim, seqlen]` - single or multiple tokens prediction
|
||||
- `[num_tokens, dim]` - continuous batching, where num_tokens is
|
||||
the total tokens of all sequences in that batch
|
||||
|
||||
conv_state: (..., dim, state_len), where state_len >= width - 1
|
||||
weight: (dim, width)
|
||||
bias: (dim,)
|
||||
|
||||
@ -583,7 +583,7 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA,
|
||||
inputs_embeds: Optional tensor of input embeddings.
|
||||
|
||||
Info:
|
||||
[Mistral3ImagePixelInputs][]
|
||||
[`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs]
|
||||
"""
|
||||
if intermediate_tensors is not None:
|
||||
inputs_embeds = None
|
||||
|
||||
@ -301,7 +301,7 @@ class MultiModalProfiler(Generic[_I]):
|
||||
Returns the maximum length of the multimodal (image placeholders+text)
|
||||
tokens, including any break/text tokens in-between image embeddings.
|
||||
|
||||
<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>
|
||||
`<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>`
|
||||
Returns 9, even when the number of image embeddings is 6.
|
||||
|
||||
This is important to take into account when profiling and
|
||||
|
||||
@ -24,8 +24,9 @@ class KVCacheBlocks:
|
||||
"""
|
||||
blocks: tuple[list[KVCacheBlock], ...]
|
||||
"""
|
||||
blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens.
|
||||
We don't use block of tokens as the outer dimension because it assumes all
|
||||
`blocks[i][j]` refers to the i-th kv_cache_group
|
||||
and the j-th block of tokens.We don't use block of
|
||||
tokens as the outer dimension because it assumes all
|
||||
kv_cache_groups have the same number of blocks, which is true for now but
|
||||
will be broken if we want to give different block_size to different
|
||||
kv_cache_groups in the future.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user