diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py
index 10261aa423c01..2fb79f57a67f1 100644
--- a/vllm/model_executor/models/llava_next.py
+++ b/vllm/model_executor/models/llava_next.py
@@ -540,7 +540,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
         Unlike in LLaVA-1.5, the number of image tokens inputted to the language
         model depends on the original size of the input image. Including the
         original image token in the input, the required number of image tokens
-        is given by {func}`get_llava_next_image_feature_size`.
+        is given by [get_llava_next_image_feature_size][].
 
         This way, the `positions` and `attn_metadata` are consistent
         with the `input_ids`.
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index 320a26f375557..f56110d94ab28 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -387,7 +387,7 @@ _M = TypeVar("_M", bound=Union[_HasModalityAttr, _HasModalityProp])
 
 
 def full_groupby_modality(values: Iterable[_M]) -> ItemsView[str, list[_M]]:
-    """Convenience function to apply {func}`full_groupby` based on modality."""
+    """Convenience function to apply [full_groupby][] based on modality."""
     return full_groupby(values, key=lambda x: x.modality)
 
 
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 20284b4e1801f..646faa944565d 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -157,7 +157,7 @@ class Platform:
         return self._enum == PlatformEnum.OOT
 
     def is_cuda_alike(self) -> bool:
-        """Stateless version of {func}`torch.cuda.is_available`."""
+        """Stateless version of [torch.cuda.is_available][]."""
         return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)
 
     def is_sleep_mode_available(self) -> bool:
@@ -194,7 +194,7 @@ class Platform:
         cls,
         device_id: int = 0,
     ) -> Optional[DeviceCapability]:
-        """Stateless version of {func}`torch.cuda.get_device_capability`."""
+        """Stateless version of [torch.cuda.get_device_capability][]."""
         return None
 
     @classmethod
diff --git a/vllm/sequence.py b/vllm/sequence.py
index f3dfd32d9169e..e9212a82506e3 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -27,7 +27,7 @@ VLLM_INVALID_TOKEN_ID = -1
 
 
 def array_full(token_id: int, count: int):
-    """{class}`array` equivalent of {func}`numpy.full`."""
+    """{class}`array` equivalent of [numpy.full][]."""
     return array(VLLM_TOKEN_ID_ARRAY_TYPE, [token_id]) * count
 
 
diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py
index 267754036b317..28503a0a926d9 100644
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -66,7 +66,7 @@ def gather_mm_placeholders(
     """
     Reconstructs the embeddings from the placeholder tokens.
 
-    This is the operation of {func}`scatter_mm_placeholders`.
+    This is the operation of [scatter_mm_placeholders][].
     """
     if is_embed is None:
         return placeholders