[Doc] Fix cross-reference warnings (#25058)

Signed-off-by: Punit Vara <punitvara@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-05-22 15:37:54 +08:00 · 2025-09-18 14:35:16 +05:30 · 2025-09-18 14:35:16 +05:30 · 05b044e698
commit 05b044e698
parent aa3f105c59
6 changed files with 18 additions and 14 deletions
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@ -171,7 +171,8 @@ class BenchmarkDataset(ABC):
                If `None`, LoRA is not used.
        Returns:
-            A new [LoRARequest][] (or `None` if not applicable).
+            A new [`LoRARequest`][vllm.lora.request.LoRARequest]
            (or `None` if not applicable).
        """
        if max_loras is None or lora_path is None:
            return None
--- a/vllm/distributed/device_communicators/shm_object_storage.py
+++ b/vllm/distributed/device_communicators/shm_object_storage.py
@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer:
    - Maintains metadata for each allocated buffer chunk in the writer process
    - Supports custom "is_free_fn" functions to determine when buffers can be
      reused
-    - Each buffer chunk contains: [4-byte id][4-byte size][actual_data]
+    - Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]`
    Key Concepts:
    - monotonic_id_start/end: Track the range of active buffer IDs
@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer:
    - Writer handles garbage collection (free_buf) based on reader feedback
    Memory Layout per Buffer Chunk:
-    [4-byte monotonic_id][4-byte chunk_size][actual_data...]
+    `[4-byte monotonic_id][4-byte chunk_size][actual_data...]`
    ^metadata_start                         ^data_start
    The monotonic_id ensures data integrity - readers can verify they're
@ -185,7 +185,7 @@ class SingleWriterShmRingBuffer:
        '''
        Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory.
        Memory layout:
-        [4-byte monotonic_id][4-byte size][buffer data...]
+        `[4-byte monotonic_id][4-byte size][buffer data...]`
        '''
        assert self.is_writer, "Only the writer can allocate buffers."
        assert size > 0, "Size must be greater than 0"
@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage:
      allocation
    Memory Layout per Object:
-    [4-byte reference_count][metadata_size][serialized_object_data]
+    `[4-byte reference_count][metadata_size][serialized_object_data]`
    Thread Safety:
    - Writer operations (put, clear) are single-threaded by design
--- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
+++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
@ -927,11 +927,13 @@ def causal_conv1d_update(
    validate_data=False,
 ):
    """
-    x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim)
+    x: Input tensor which can take the following shapes:
-        [shape=2: single token prediction]
+
-        [shape=3: single or multiple tokens prediction]
+    - `[batch, dim]` - single token prediction
-        [shape=2 with num_tokens: continuous batching, where num_tokens is the
+    - `[batch, dim, seqlen]` - single or multiple tokens prediction
-                                  total tokens of all sequences in that batch]
+    - `[num_tokens, dim]` - continuous batching, where num_tokens is
        the total tokens of all sequences in that batch
    conv_state: (..., dim, state_len), where state_len >= width - 1
    weight: (dim, width)
    bias: (dim,)
--- a/vllm/model_executor/models/mistral3.py
+++ b/vllm/model_executor/models/mistral3.py
@ -583,7 +583,7 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA,
            inputs_embeds: Optional tensor of input embeddings.
        Info:
-            [Mistral3ImagePixelInputs][]
+            [`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs]
        """
        if intermediate_tensors is not None:
            inputs_embeds = None
--- a/vllm/multimodal/profiling.py
+++ b/vllm/multimodal/profiling.py
@ -301,7 +301,7 @@ class MultiModalProfiler(Generic[_I]):
        Returns the maximum length of the multimodal (image placeholders+text)
        tokens, including any break/text tokens in-between image embeddings.
-        <im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>
+        `<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>`
        Returns 9, even when the number of image embeddings is 6.
        This is important to take into account when profiling and
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@ -24,8 +24,9 @@ class KVCacheBlocks:
    """
    blocks: tuple[list[KVCacheBlock], ...]
    """
-    blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens.
+    `blocks[i][j]` refers to the i-th kv_cache_group
-    We don't use block of tokens as the outer dimension because it assumes all
+    and the j-th block of tokens.We don't use block of
    tokens as the outer dimension because it assumes all
    kv_cache_groups have the same number of blocks, which is true for now but 
    will be broken if we want to give different block_size to different 
    kv_cache_groups in the future.