[Doc]: fix typos in various files (#24726)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
Didier Durand 2025-09-12 15:43:12 +02:00 committed by GitHub
parent 0377802c20
commit bcb06d7baf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 11 additions and 11 deletions

View File

@ -56,7 +56,7 @@ def w8a8_block_matmul(
Bs: The per-block quantization scale for `B`.
block_size: The block size for per-block quantization.
It should be 2-dim, e.g., [128, 128].
output_dytpe: The dtype of the returned tensor.
output_dtype: The dtype of the returned tensor.
Returns:
torch.Tensor: The result of matmul.

View File

@ -12,7 +12,7 @@ namespace vec_op {
#define vec_sub(a, b) ((a) - (b))
#define vec_mul(a, b) ((a) * (b))
#define vec_div(a, b) ((a) / (b))
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebaic
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebraic
#define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
// FIXME: FP16 is not fully supported in Torch-CPU

View File

@ -215,7 +215,7 @@ int moe_align_block_size(
offsets[mb + 1] = sorted_id_size(sorted_ids + mb * BLOCK_M);
}
});
// TODO: do we need to vecterize this ?
// TODO: do we need to vectorize this ?
for (int mb = 0; mb < num_token_blocks; ++mb) {
offsets[mb + 1] += offsets[mb];
}

View File

@ -8,7 +8,7 @@ page for information on known issues and how to solve them.
## Introduction
!!! important
The source code references are to the state of the code at the time of writing in December, 2024.
The source code references are to the state of the code at the time of writing in December 2024.
The use of Python multiprocessing in vLLM is complicated by:

View File

@ -901,7 +901,7 @@ def _get_query_key_seq_metadata(
attn_metadata.encoder_seq_start_loc,
attn_metadata.max_encoder_seq_len)
elif attn_type == AttentionType.ENCODER:
# For encoder attention both the query and the key are same i.e the
# For encoder attention both the query and the key are same i.e. the
# encoder sequence.
return (attn_metadata.encoder_seq_start_loc,
attn_metadata.max_encoder_seq_len,

View File

@ -551,7 +551,7 @@ class RandomDataset(BenchmarkDataset):
[6880, 6881] -> ['Ġcalls', 'here'] ->
[1650, 939, 486] -> ['Ġcall', 'sh', 'ere']
To avoid uncontrolled change of the prompt length,
the encoded sequence is truncated before being decode again.
the encoded sequence is truncated before being decoded again.
"""
# Build the inner sequence by sampling sequentially from the vocab
inner_seq = ((offset + index + np.arange(input_len))

View File

@ -242,7 +242,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
elif processors:
raise ValueError(
"The `logits_processors` argument is not supported by this "
"server. See --logits-processor-pattern engine argugment "
"server. See --logits-processor-pattern engine argument "
"for more information.")
return None

View File

@ -324,7 +324,7 @@ class MambaMixer2(MambaBase, CustomOp):
# - the weight already has a "weight_loader" attribute
# which set_weight_attrs will raise if we do not
# delete before trying to override it
# - ditto for the otther two weights below
# - ditto for the other two weights below
delattr(self.conv1d.bias, "weight_loader")
set_weight_attrs(
self.conv1d.bias,

View File

@ -1117,7 +1117,7 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
def _process_multimodal_inputs(self, modalities: dict):
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary

View File

@ -2659,7 +2659,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
num_tokens += num_pad
# If cudagraph_mode.decode_mode() == FULL and
# cudagraph_mode.seperate_routine(). This means that we are using
# cudagraph_mode.separate_routine(). This means that we are using
# different graphs and/or modes for mixed prefill-decode batches vs.
# uniform decode batches. A uniform decode batch means that all
# requests have identical query length, except a potential virtual

View File

@ -392,7 +392,7 @@ class InputBatch:
# NOTE: the following is unsafe
# self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\
# self.token_ids_cpu[i2, ...], self.token_ids_cpu[i1, ...]
# instead, we need to temporiarily copy the data for one of the indices
# instead, we need to temporarily copy the data for one of the indices
# TODO(lucas): optimize this by only copying valid indices
tmp = self.token_ids_cpu[i1, ...].copy()
self.token_ids_cpu[i1, ...] = self.token_ids_cpu[i2, ...]