From 05b044e698bb3c151871d94b64fabd87188de9ef Mon Sep 17 00:00:00 2001 From: Punitvara Date: Thu, 18 Sep 2025 14:35:16 +0530 Subject: [PATCH] [Doc] Fix cross-reference warnings (#25058) Signed-off-by: Punit Vara Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/benchmarks/datasets.py | 3 ++- .../device_communicators/shm_object_storage.py | 8 ++++---- .../model_executor/layers/mamba/ops/causal_conv1d.py | 12 +++++++----- vllm/model_executor/models/mistral3.py | 2 +- vllm/multimodal/profiling.py | 2 +- vllm/v1/core/kv_cache_manager.py | 5 +++-- 6 files changed, 18 insertions(+), 14 deletions(-) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 1831539a6adb..1cab40802c39 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -171,7 +171,8 @@ class BenchmarkDataset(ABC): If `None`, LoRA is not used. Returns: - A new [LoRARequest][] (or `None` if not applicable). + A new [`LoRARequest`][vllm.lora.request.LoRARequest] + (or `None` if not applicable). """ if max_loras is None or lora_path is None: return None diff --git a/vllm/distributed/device_communicators/shm_object_storage.py b/vllm/distributed/device_communicators/shm_object_storage.py index 3fac104bda1e..352e7525d4c8 100644 --- a/vllm/distributed/device_communicators/shm_object_storage.py +++ b/vllm/distributed/device_communicators/shm_object_storage.py @@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer: - Maintains metadata for each allocated buffer chunk in the writer process - Supports custom "is_free_fn" functions to determine when buffers can be reused - - Each buffer chunk contains: [4-byte id][4-byte size][actual_data] + - Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]` Key Concepts: - monotonic_id_start/end: Track the range of active buffer IDs @@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer: - Writer handles garbage collection (free_buf) based on reader feedback Memory Layout per Buffer Chunk: - [4-byte monotonic_id][4-byte chunk_size][actual_data...] + `[4-byte monotonic_id][4-byte chunk_size][actual_data...]` ^metadata_start ^data_start The monotonic_id ensures data integrity - readers can verify they're @@ -185,7 +185,7 @@ class SingleWriterShmRingBuffer: ''' Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory. Memory layout: - [4-byte monotonic_id][4-byte size][buffer data...] + `[4-byte monotonic_id][4-byte size][buffer data...]` ''' assert self.is_writer, "Only the writer can allocate buffers." assert size > 0, "Size must be greater than 0" @@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage: allocation Memory Layout per Object: - [4-byte reference_count][metadata_size][serialized_object_data] + `[4-byte reference_count][metadata_size][serialized_object_data]` Thread Safety: - Writer operations (put, clear) are single-threaded by design diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py index 7e3ea561fd29..2a88fa661da0 100644 --- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py +++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py @@ -927,11 +927,13 @@ def causal_conv1d_update( validate_data=False, ): """ - x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim) - [shape=2: single token prediction] - [shape=3: single or multiple tokens prediction] - [shape=2 with num_tokens: continuous batching, where num_tokens is the - total tokens of all sequences in that batch] + x: Input tensor which can take the following shapes: + + - `[batch, dim]` - single token prediction + - `[batch, dim, seqlen]` - single or multiple tokens prediction + - `[num_tokens, dim]` - continuous batching, where num_tokens is + the total tokens of all sequences in that batch + conv_state: (..., dim, state_len), where state_len >= width - 1 weight: (dim, width) bias: (dim,) diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py index 09479012a03a..d15776a39362 100644 --- a/vllm/model_executor/models/mistral3.py +++ b/vllm/model_executor/models/mistral3.py @@ -583,7 +583,7 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA, inputs_embeds: Optional tensor of input embeddings. Info: - [Mistral3ImagePixelInputs][] + [`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs] """ if intermediate_tensors is not None: inputs_embeds = None diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py index bad6c0c3d9db..fbbc55d3524c 100644 --- a/vllm/multimodal/profiling.py +++ b/vllm/multimodal/profiling.py @@ -301,7 +301,7 @@ class MultiModalProfiler(Generic[_I]): Returns the maximum length of the multimodal (image placeholders+text) tokens, including any break/text tokens in-between image embeddings. - [IMG] [IMG] [IMG] [IMG] [IMG] [IMG] + ` [IMG] [IMG] [IMG] [IMG] [IMG] [IMG] ` Returns 9, even when the number of image embeddings is 6. This is important to take into account when profiling and diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 3a0fbb5e5c41..401327f727a4 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -24,8 +24,9 @@ class KVCacheBlocks: """ blocks: tuple[list[KVCacheBlock], ...] """ - blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens. - We don't use block of tokens as the outer dimension because it assumes all + `blocks[i][j]` refers to the i-th kv_cache_group + and the j-th block of tokens.We don't use block of + tokens as the outer dimension because it assumes all kv_cache_groups have the same number of blocks, which is true for now but will be broken if we want to give different block_size to different kv_cache_groups in the future.