From 0235103cbbdb511e6708aae600f759060a797c16 Mon Sep 17 00:00:00 2001
From: Didier Durand <2927957+didier-durand@users.noreply.github.com>
Date: Tue, 2 Sep 2025 04:07:45 +0200
Subject: [PATCH] [Doc]: fix typos in Python comments (#24042)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
---
 vllm/distributed/device_communicators/quick_all_reduce.py   | 2 +-
 vllm/distributed/device_communicators/ray_communicator.py   | 2 +-
 vllm/entrypoints/openai/run_batch.py                        | 2 +-
 vllm/entrypoints/openai/serving_responses.py                | 2 +-
 vllm/executor/ray_utils.py                                  | 2 +-
 .../model_executor/layers/quantization/utils/quant_utils.py | 2 +-
 vllm/model_executor/models/registry.py                      | 2 +-
 vllm/model_executor/sampling_metadata.py                    | 2 +-
 vllm/scalar_type.py                                         | 2 +-
 vllm/sequence.py                                            | 2 +-
 vllm/v1/core/sched/scheduler.py                             | 6 +++---
 vllm/v1/metrics/stats.py                                    | 2 +-
 12 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/vllm/distributed/device_communicators/quick_all_reduce.py b/vllm/distributed/device_communicators/quick_all_reduce.py
index c61231e2d33f4..836241910e2fb 100644
--- a/vllm/distributed/device_communicators/quick_all_reduce.py
+++ b/vllm/distributed/device_communicators/quick_all_reduce.py
@@ -78,7 +78,7 @@ class QuickAllReduce:
             group: the process group to work on. If None, it will use the
                 default process group.
             device: the device to bind the CustomAllreduce to. If None,
-                it will be bind to f"cuda:{local_rank}".
+                it will be bound to f"cuda:{local_rank}".
         It is the caller's responsibility to make sure each communicator
         is bind to a unique device, and all communicators in this group
         are in the same node.
diff --git a/vllm/distributed/device_communicators/ray_communicator.py b/vllm/distributed/device_communicators/ray_communicator.py
index 46cc1c2f52d67..8cd8c459a9e51 100644
--- a/vllm/distributed/device_communicators/ray_communicator.py
+++ b/vllm/distributed/device_communicators/ray_communicator.py
@@ -186,7 +186,7 @@ class RayPPCommunicator(Communicator):
         """
         Receive a torch.Tensor from a peer and synchronize the current stream.
 
-        After this call returns, the receive buffer is safe to read from from
+        After this call returns, the receive buffer is safe to read from
         any stream. An RayChannelError will be raised if an error occurred
         (e.g., remote actor died), and the buffer is not safe to read.
 
diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py
index 01551a8c7f04a..fa813550e520c 100644
--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
@@ -161,7 +161,7 @@ async def write_local_file(output_path: str,
     batch_outputs: The list of batch outputs to write.
     """
     # We should make this async, but as long as run_batch runs as a
-    # standalone program, blocking the event loop won't effect performance.
+    # standalone program, blocking the event loop won't affect performance.
     with open(output_path, "w", encoding="utf-8") as f:
         for o in batch_outputs:
             print(o.model_dump_json(), file=f)
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 6a676cfe1b388..4c15de3030998 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -728,7 +728,7 @@ class OpenAIServingResponses(OpenAIServing):
             for response_msg in request.input:
                 messages.append(
                     parse_response_input(response_msg, prev_outputs))
-                # User passes in a a tool call request and its output. We need
+                # User passes in a tool call request and its output. We need
                 # to add the tool call request to prev_outputs so that the
                 # parse_response_input can find the tool call request when
                 # parsing the tool call output.
diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py
index 4b2a15afb67a7..0bdeb28569892 100644
--- a/vllm/executor/ray_utils.py
+++ b/vllm/executor/ray_utils.py
@@ -223,7 +223,7 @@ def _wait_until_pg_ready(current_placement_group: "PlacementGroup"):
 
     """
     # Wait until PG is ready - this will block until all
-    # requested resources are available, and will timeout
+    # requested resources are available, and will time out
     # if they cannot be provisioned.
     placement_group_specs = current_placement_group.bundle_specs
 
diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py
index 6154fca2e416d..f4ff875adb21c 100644
--- a/vllm/model_executor/layers/quantization/utils/quant_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py
@@ -116,7 +116,7 @@ def _normalize_quant_group_shape(x: torch.Tensor, group_shape: GroupShape):
 # then we would expand a to:
 #       a = [[1, 1, 2, 2],
 #            [3, 3, 4, 4]]
-# NOTE this function this function does not explicitly broadcast dimensions
+# NOTE this function does not explicitly broadcast dimensions
 # with an extent of 1, since this can be done implicitly by pytorch
 def group_broadcast(t, shape):
     for i, s in enumerate(shape):
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index edb7f24214406..f236040bb2341 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -185,7 +185,7 @@ _EMBEDDING_MODELS = {
     "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
     "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),  # noqa: E501
     # Technically PrithviGeoSpatialMAE is a model that works on images, both in
-    # input and output. I am adding it here because it piggy-backs on embedding
+    # input and output. I am adding it here because it piggybacks on embedding
     # models for the time being.
     "PrithviGeoSpatialMAE": ("prithvi_geospatial_mae", "PrithviGeoSpatialMAE"),
 }
diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
index 56f0f0984bfa0..2315f9dad5a5a 100644
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@@ -97,7 +97,7 @@ class SamplingMetadataCache:
 class SamplingMetadata:
     """Metadata for input sequences. Used in sampler.
 
-    The usage is as follow;
+    The usage is as follows;
     ```
     hidden_states = execute_model(...)
     logits = hidden_states[sampling_metadata.selected_token_indices]
diff --git a/vllm/scalar_type.py b/vllm/scalar_type.py
index 6f11ab8e0300a..055f28914ad59 100644
--- a/vllm/scalar_type.py
+++ b/vllm/scalar_type.py
@@ -269,7 +269,7 @@ class ScalarType:
 
     @classmethod
     def uint(cls, size_bits: int, bias: Optional[int]) -> 'ScalarType':
-        """Create a unsigned integer scalar type."""
+        """Create an unsigned integer scalar type."""
         ret = cls(0, size_bits, False, bias if bias else 0)
         ret.id  # noqa B018: make sure the id is cached
         return ret
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 4b8e1f4641f79..24114c0bb792e 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -1193,7 +1193,7 @@ class HiddenStates(msgspec.Struct, array_like=True,
     seq_ids are the sequence ids of each entry of the batch
     dimension of the hidden_states tensor"""
     # Scorer hidden states. For prefill step, it is used for hidden states of
-    # all tokens, whereas for decode step, it use used for last accepted tokens.
+    # all tokens, whereas for decode step, it is used for last accepted tokens.
     hidden_states: torch.Tensor
     # The sequence group metadata list. Only needed for decode step.
     seq_group_metadata_list: Optional[list[SequenceGroupMetadata]] = None
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
index e07d53ff84d37..8322fa7335b69 100644
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -815,7 +815,7 @@ class Scheduler(SchedulerInterface):
         # NOTE: structured_output_request_ids maps
         # a request's (request that uses structured output)
         # request_id to its index in the batch.
-        # This will helps us determine to slice the grammar bitmask
+        # This will help us determine to slice the grammar bitmask
         # and only applies valid mask for requests that
         # uses structured decoding.
         structured_output_request_ids: dict[str, int] = {}
@@ -923,7 +923,7 @@ class Scheduler(SchedulerInterface):
                     request):
                 # NOTE: structured_output_request
                 # should not be None if use_structured_output, we have
-                # check above, so safe to ignore type warning
+                # checked above, so safe to ignore type warning
                 request.structured_output_request.grammar.accept_tokens(  # type: ignore[union-attr]
                     req_id, new_token_ids)
 
@@ -1242,7 +1242,7 @@ class Scheduler(SchedulerInterface):
         finished_sending reqs to the output.
         * if finished_sending: free the blocks
         # if finished_recving: add to state so we can
-            scheduler the request during the next step.
+            schedule the request during the next step.
         """
 
         if self.connector is not None:
diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py
index 9a80460261e02..95094bda65cde 100644
--- a/vllm/v1/metrics/stats.py
+++ b/vllm/v1/metrics/stats.py
@@ -59,7 +59,7 @@ class RequestStateStats:
 
     num_generation_tokens: int = 0
 
-    # This is a engine frontend timestamp (wall-clock)
+    # This is an engine frontend timestamp (wall-clock)
     arrival_time: float = 0.0
 
     # These are engine core timestamps (monotonic)