[Doc]: fixing typos in diverse files (#29492)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
Didier Durand 2025-11-27 16:15:50 +01:00 committed by GitHub
parent bab438ff3e
commit 66d3d5422c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 12 additions and 10 deletions

View File

@ -1005,7 +1005,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
help="Key-value pairs (e.g, --header x-additional-info=0.3.3) " help="Key-value pairs (e.g, --header x-additional-info=0.3.3) "
"for headers to be passed with each request. These headers override " "for headers to be passed with each request. These headers override "
"per backend constants and values set via environment variable, and " "per backend constants and values set via environment variable, and "
"will be overriden by other arguments (such as request ids).", "will be overridden by other arguments (such as request ids).",
) )
parser.add_argument( parser.add_argument(
"--max-concurrency", "--max-concurrency",
@ -1138,7 +1138,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--percentile-metrics", "--percentile-metrics",
type=str, type=str,
default=None, default=None,
help="Comma-separated list of selected metrics to report percentils. " help="Comma-separated list of selected metrics to report percentiles. "
"This argument specifies the metrics to report percentiles. " "This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". ' 'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'If not specified, defaults to "ttft,tpot,itl" for generative models ' 'If not specified, defaults to "ttft,tpot,itl" for generative models '

View File

@ -238,9 +238,9 @@ class ParallelConfig:
cp_kv_cache_interleave_size: int = 1 cp_kv_cache_interleave_size: int = 1
"""Interleave size of kv_cache storage while using DCP or PCP. """Interleave size of kv_cache storage while using DCP or PCP.
For `total_cp_rank = pcp_rank * dcp_world_size + dcp_rank`, For `total_cp_rank = pcp_rank * dcp_world_size + dcp_rank`,
and `total_cp_world_size = pcp_world_size * dcp_world_szie`. and `total_cp_world_size = pcp_world_size * dcp_world_size`.
store interleave_size tokens on total_cp_rank i, store interleave_size tokens on total_cp_rank i,
then store next interleave_size tokens on taotal_cp_rank i+1. then store next interleave_size tokens on total_cp_rank i+1.
Interleave_size=1: token-level alignment, where token `i` is stored on Interleave_size=1: token-level alignment, where token `i` is stored on
total_cp_rank `i % total_cp_world_size`. total_cp_rank `i % total_cp_world_size`.
Interleave_size=block_size: block-level alignment, where tokens are Interleave_size=block_size: block-level alignment, where tokens are

View File

@ -173,7 +173,7 @@ class PunicaWrapperBase(PunicaWrapperABC):
vocab_size: int, vocab_size: int,
): ):
# NOTE We have remove lora extra vocab support for now. So we set # NOTE We have remove lora extra vocab support for now. So we set
# extra_vocab_size alwayzs to 0, and extra_vocab_size will be removed. # extra_vocab_size always to 0, and extra_vocab_size will be removed.
extra_vocab_size = 0 extra_vocab_size = 0
( (

View File

@ -181,7 +181,7 @@ def apply_top_k_top_p(
after thresholding the logit using this cut-off, the remaining elements after thresholding the logit using this cut-off, the remaining elements
shall constitute the top-p set. shall constitute the top-p set.
Note: in the case of tie (i.e. multipple cut-off elements present in the Note: in the case of tie (i.e. multiple cut-off elements present in the
logit), all tie elements are included in the top-p set. In other words, logit), all tie elements are included in the top-p set. In other words,
this function does not break ties. Instead, these tie tokens have equal this function does not break ties. Instead, these tie tokens have equal
chance of being chosen during final sampling, so we can consider the tie chance of being chosen during final sampling, so we can consider the tie

View File

@ -24,12 +24,14 @@ def _get_device_and_group(parallel_config: ParallelConfig):
device = get_dp_group().device device = get_dp_group().device
group = get_dp_group().device_group group = get_dp_group().device_group
# Transfering this tensor from GPU to CPU will introduce a GPU sync # Transferring this tensor from GPU to CPU will introduce a GPU sync
# point that could adversely affect performance of vllm with asynch # point that could adversely affect performance of vllm with asynch
# scheduling. This environment variable exists to quickly disable # scheduling. This environment variable exists to quickly disable
# this optimization if we run into this case. # this optimization if we run into this case.
if parallel_config.disable_nccl_for_dp_synchronization: if parallel_config.disable_nccl_for_dp_synchronization:
logger.info_once("Using CPU all reduce to syncronize DP padding between ranks.") logger.info_once(
"Using CPU all reduce to synchronize DP padding between ranks."
)
device = "cpu" device = "cpu"
group = get_dp_group().cpu_group group = get_dp_group().cpu_group
return device, group return device, group