[Doc]: fix typos in Python comments (#24173)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com>
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
Didier Durand 2025-09-04 17:52:17 +02:00 committed by GitHub
parent e41a0fa377
commit 83609ca91d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 13 additions and 13 deletions

View File

@ -403,7 +403,7 @@ class RandomDataset(BenchmarkDataset):
# [6880, 6881] -> ['Ġcalls', 'here'] ->
# [1650, 939, 486] -> ['Ġcall', 'sh', 'ere']
# To avoid uncontrolled change of the prompt length,
# the encoded sequence is truncated before being decode again.
# the encoded sequence is truncated before being decoded again.
total_input_len = prefix_len + int(input_lens[i])
re_encoded_sequence = tokenizer.encode(prompt, add_special_tokens=False)[
:total_input_len

View File

@ -962,7 +962,7 @@ async def main_mp(
# At this point all the clients finished,
# collect results (TTFT, TPOT, etc.) from all the clients.
# This needs to happens before calling join on the clients
# This needs to happen before calling join on the clients
# (result_queue should be emptied).
while not result_queue.empty():
client_metrics.append(result_queue.get())

View File

@ -117,7 +117,7 @@ def run_gemma3n(question: str, audio_count: int) -> ModelRequestData:
# Granite Speech
def run_granite_speech(question: str, audio_count: int) -> ModelRequestData:
# NOTE - the setting in this example are somehat different than what is
# NOTE - the setting in this example are somewhat different from what is
# optimal for granite speech, and it is generally recommended to use beam
# search. Check the model README for suggested settings.
# https://huggingface.co/ibm-granite/granite-speech-3.3-8b

View File

@ -250,7 +250,7 @@ def build_video_inputs_from_test_info(
def apply_image_size_scaling(image, size: Union[float, tuple[int, int]],
size_type: SizeType):
"""Applies a size scaler to one image; this can be a an image size factor,
"""Applies a size scaler to one image; this can be an image size factor,
which scales the image while maintaining the aspect ratio"""
# Special case for embeddings; if it's a tensor, it's only valid if we
# are considering size factors at constant scale, i.e., we just clone

View File

@ -42,7 +42,7 @@ def get_filtered_test_settings(
else:
assert test_info.prompt_formatter is not None
# Everything looks okay; keep if this is has correct proc handling
# Everything looks okay; keep if this is correct proc handling
if (test_info.distributed_executor_backend
is not None) == new_proc_per_test:
matching_tests[test_name] = test_info

View File

@ -822,7 +822,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[T], Generic[T]):
and context_lens_tensor is not None \
and context_lens_tensor[:self.num_prefills].max() > 0:
# NOTE: it is recommend you read the `Chunked Prefill` section in
# NOTE: it is recommended you read the `Chunked Prefill` section in
# the comment at the top of the file before trying to understand
# the following code

View File

@ -128,7 +128,7 @@ class QuantizationConfig(ABC):
@staticmethod
def get_from_keys_or(config: dict[str, Any], keys: list[str],
default: Any) -> Any:
"""Get a optional value from the model's quantization config."""
"""Get an optional value from the model's quantization config."""
try:
return QuantizationConfig.get_from_keys(config, keys)
except ValueError:

View File

@ -401,7 +401,7 @@ M = TypeVar("M", bound=MLACommonMetadata)
def use_flashinfer_prefill() -> bool:
# For blackwell default to flashinfer prefill if its available since
# For blackwell default to flashinfer prefill if it's available since
# it is faster than FA2.
return (flashinfer_available and not envs.VLLM_USE_CUDNN_PREFILL
and current_platform.is_device_capability(100))
@ -1018,7 +1018,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
return layer.weight
# we currently do not have quantized bmm's which are needed for
# `W_UV` and `W_UK_T`, we we just store fp16/bf16 copies and perform
# `W_UV` and `W_UK_T`, we just store fp16/bf16 copies and perform
# the bmm's in 16-bit, the extra memory overhead of this is fairly low
kv_b_proj_weight = get_and_maybe_dequant_weights(self.kv_b_proj).T
assert kv_b_proj_weight.shape == (