[Docs] Fix warnings in mkdocs build (continued) (#25042)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wenlong Wang 2025-09-20 04:45:18 -07:00 committed by yewentao256
parent c2fdc71c91
commit dad5f4d16d
7 changed files with 24 additions and 15 deletions

View File

@ -15,7 +15,7 @@ is used by model runners to dispatch data processing according to the target
model. model.
Info: Info:
[mm_processing](../../../design/mm_processing.html) [mm_processing](../../../design/mm_processing.md)
""" """
__all__ = [ __all__ = [

View File

@ -3216,7 +3216,7 @@ def cprofile_context(save_file: Optional[str] = None):
Args: Args:
save_file: path to save the profile result. "1" or save_file: path to save the profile result. "1" or
None will result in printing to stdout. None will result in printing to stdout.
""" """
import cProfile import cProfile
@ -3273,7 +3273,7 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
and getattr(cfg.attn_config, "alibi", False))))) and getattr(cfg.attn_config, "alibi", False)))))
def sha256(input) -> bytes: def sha256(input: Any) -> bytes:
"""Hash any picklable Python object using SHA-256. """Hash any picklable Python object using SHA-256.
The input is serialized using pickle before hashing, which allows The input is serialized using pickle before hashing, which allows
@ -3290,7 +3290,7 @@ def sha256(input) -> bytes:
return hashlib.sha256(input_bytes).digest() return hashlib.sha256(input_bytes).digest()
def sha256_cbor(input) -> bytes: def sha256_cbor(input: Any) -> bytes:
""" """
Hash objects using CBOR serialization and SHA-256. Hash objects using CBOR serialization and SHA-256.

View File

@ -1230,7 +1230,7 @@ def get_kv_cache_configs(vllm_config: VllmConfig,
vllm_config: The global VllmConfig vllm_config: The global VllmConfig
kv_cache_specs: List of dict[layer_name, KVCacheSpec] for each worker. kv_cache_specs: List of dict[layer_name, KVCacheSpec] for each worker.
available_memory: Memory available for KV cache in bytes for each available_memory: Memory available for KV cache in bytes for each
worker. worker.
Returns: Returns:
The generated KVCacheConfigs for each worker. The generated KVCacheConfigs for each worker.

View File

@ -351,17 +351,17 @@ def generate_uniform_probs(
without a seed. without a seed.
Args: Args:
num_tokens : int num_tokens: int
Total number of tokens. Total number of tokens.
num_draft_tokens : List[List[int]] num_draft_tokens: List[List[int]]
Number of draft tokens per request. Number of draft tokens per request.
generators : Optional[Dict[int, torch.Generator]] generators: Optional[Dict[int, torch.Generator]]
A dictionary mapping indices in the batch to A dictionary mapping indices in the batch to
`torch.Generator` objects. `torch.Generator` objects.
device : torch.device device: torch.device
The device on which to allocate the tensor. The device on which to allocate the tensor.
Returns: Returns:
uniform_rand : torch.Tensor uniform_rand: torch.Tensor
A tensor of shape `(num_tokens, )` containing uniform A tensor of shape `(num_tokens, )` containing uniform
random values in the range [0, 1). random values in the range [0, 1).
""" """

View File

@ -1479,7 +1479,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
Args: Args:
scheduler_output: The scheduler output containing scheduled encoder scheduler_output: The scheduler output containing scheduled encoder
inputs. inputs.
Returns: Returns:
A tuple of (mm_kwargs, req_ids_pos) where: A tuple of (mm_kwargs, req_ids_pos) where:

View File

@ -205,7 +205,8 @@ def gather_mm_placeholders(
""" """
Reconstructs the embeddings from the placeholder tokens. Reconstructs the embeddings from the placeholder tokens.
This is the operation of [scatter_mm_placeholders][]. This is the operation of [`scatter_mm_placeholders`]
[vllm.v1.worker.utils.scatter_mm_placeholders].
""" """
if is_embed is None: if is_embed is None:
return placeholders return placeholders

View File

@ -1810,7 +1810,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
return [output] return [output]
def need_recv_kv(self, model_input, kv_caches) -> bool: def need_recv_kv(self, model_input: ModelInputForGPUWithSamplingMetadata,
kv_caches: List[torch.Tensor]) -> bool:
"""Check if we need to receive kv-cache from the other worker. """Check if we need to receive kv-cache from the other worker.
We need to receive KV when We need to receive KV when
1. current vLLM instance is KV cache consumer/decode vLLM instance 1. current vLLM instance is KV cache consumer/decode vLLM instance
@ -1825,6 +1826,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
if self.vllm_config.kv_transfer_config is None: if self.vllm_config.kv_transfer_config is None:
return False return False
if model_input.attn_metadata is None:
raise ValueError("model_input.attn_metadata cannot be None")
prefill_meta = model_input.attn_metadata.prefill_metadata prefill_meta = model_input.attn_metadata.prefill_metadata
# check if the current run is profiling # check if the current run is profiling
@ -1835,7 +1839,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
return self.vllm_config.kv_transfer_config.is_kv_consumer and ( return self.vllm_config.kv_transfer_config.is_kv_consumer and (
not is_profile_run) and is_prefill_run not is_profile_run) and is_prefill_run
def need_send_kv(self, model_input, kv_caches) -> bool: def need_send_kv(self, model_input: ModelInputForGPUWithSamplingMetadata,
kv_caches: List[torch.Tensor]) -> bool:
"""Check if we need to send kv-cache to the other worker. """Check if we need to send kv-cache to the other worker.
We need to send KV when We need to send KV when
1. current vLLM instance is KV cache producer/prefill vLLM instance 1. current vLLM instance is KV cache producer/prefill vLLM instance
@ -1850,6 +1855,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
if self.vllm_config.kv_transfer_config is None: if self.vllm_config.kv_transfer_config is None:
return False return False
if model_input.attn_metadata is None:
raise ValueError("model_input.attn_metadata cannot be None")
prefill_meta = model_input.attn_metadata.prefill_metadata prefill_meta = model_input.attn_metadata.prefill_metadata
# check if the current run is profiling # check if the current run is profiling