[Doc]: fix typos in Python comments (#24042)

Signed-off-by: Didier Durand <durand.didier@gmail.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
2026-03-19 18:47:20 +08:00 · 2025-09-02 04:07:45 +02:00 · 2025-09-02 04:07:45 +02:00 · 0235103cbb
commit 0235103cbb
parent a344a5aa0a
12 changed files with 14 additions and 14 deletions
--- a/vllm/distributed/device_communicators/quick_all_reduce.py
+++ b/vllm/distributed/device_communicators/quick_all_reduce.py
@ -78,7 +78,7 @@ class QuickAllReduce:
            group: the process group to work on. If None, it will use the
                default process group.
            device: the device to bind the CustomAllreduce to. If None,
-                it will be bind to f"cuda:{local_rank}".
+                it will be bound to f"cuda:{local_rank}".
        It is the caller's responsibility to make sure each communicator
        is bind to a unique device, and all communicators in this group
        are in the same node.
--- a/vllm/distributed/device_communicators/ray_communicator.py
+++ b/vllm/distributed/device_communicators/ray_communicator.py
@ -186,7 +186,7 @@ class RayPPCommunicator(Communicator):
        """
        Receive a torch.Tensor from a peer and synchronize the current stream.

-        After this call returns, the receive buffer is safe to read from from
+        After this call returns, the receive buffer is safe to read from
        any stream. An RayChannelError will be raised if an error occurred
        (e.g., remote actor died), and the buffer is not safe to read.

--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
@ -161,7 +161,7 @@ async def write_local_file(output_path: str,
    batch_outputs: The list of batch outputs to write.
    """
    # We should make this async, but as long as run_batch runs as a
-    # standalone program, blocking the event loop won't effect performance.
+    # standalone program, blocking the event loop won't affect performance.
    with open(output_path, "w", encoding="utf-8") as f:
        for o in batch_outputs:
            print(o.model_dump_json(), file=f)
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@ -728,7 +728,7 @@ class OpenAIServingResponses(OpenAIServing):
            for response_msg in request.input:
                messages.append(
                    parse_response_input(response_msg, prev_outputs))
-                # User passes in a a tool call request and its output. We need
+                # User passes in a tool call request and its output. We need
                # to add the tool call request to prev_outputs so that the
                # parse_response_input can find the tool call request when
                # parsing the tool call output.
--- a/vllm/executor/ray_utils.py
+++ b/vllm/executor/ray_utils.py
@ -223,7 +223,7 @@ def _wait_until_pg_ready(current_placement_group: "PlacementGroup"):

    """
    # Wait until PG is ready - this will block until all
-    # requested resources are available, and will timeout
+    # requested resources are available, and will time out
    # if they cannot be provisioned.
    placement_group_specs = current_placement_group.bundle_specs

--- a/vllm/model_executor/layers/quantization/utils/quant_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py
@ -116,7 +116,7 @@ def _normalize_quant_group_shape(x: torch.Tensor, group_shape: GroupShape):
 # then we would expand a to:
 #       a = [[1, 1, 2, 2],
 #            [3, 3, 4, 4]]
-# NOTE this function this function does not explicitly broadcast dimensions
+# NOTE this function does not explicitly broadcast dimensions
 # with an extent of 1, since this can be done implicitly by pytorch
 def group_broadcast(t, shape):
    for i, s in enumerate(shape):
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@ -185,7 +185,7 @@ _EMBEDDING_MODELS = {
    "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
    "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),  # noqa: E501
    # Technically PrithviGeoSpatialMAE is a model that works on images, both in
-    # input and output. I am adding it here because it piggy-backs on embedding
+    # input and output. I am adding it here because it piggybacks on embedding
    # models for the time being.
    "PrithviGeoSpatialMAE": ("prithvi_geospatial_mae", "PrithviGeoSpatialMAE"),
 }
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@ -97,7 +97,7 @@ class SamplingMetadataCache:
 class SamplingMetadata:
    """Metadata for input sequences. Used in sampler.

-    The usage is as follow;
+    The usage is as follows;
    ```
    hidden_states = execute_model(...)
    logits = hidden_states[sampling_metadata.selected_token_indices]
--- a/vllm/scalar_type.py
+++ b/vllm/scalar_type.py
@ -269,7 +269,7 @@ class ScalarType:

    @classmethod
    def uint(cls, size_bits: int, bias: Optional[int]) -> 'ScalarType':
-        """Create a unsigned integer scalar type."""
+        """Create an unsigned integer scalar type."""
        ret = cls(0, size_bits, False, bias if bias else 0)
        ret.id  # noqa B018: make sure the id is cached
        return ret
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@ -1193,7 +1193,7 @@ class HiddenStates(msgspec.Struct, array_like=True,
    seq_ids are the sequence ids of each entry of the batch
    dimension of the hidden_states tensor"""
    # Scorer hidden states. For prefill step, it is used for hidden states of
-    # all tokens, whereas for decode step, it use used for last accepted tokens.
+    # all tokens, whereas for decode step, it is used for last accepted tokens.
    hidden_states: torch.Tensor
    # The sequence group metadata list. Only needed for decode step.
    seq_group_metadata_list: Optional[list[SequenceGroupMetadata]] = None
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@ -815,7 +815,7 @@ class Scheduler(SchedulerInterface):
        # NOTE: structured_output_request_ids maps
        # a request's (request that uses structured output)
        # request_id to its index in the batch.
-        # This will helps us determine to slice the grammar bitmask
+        # This will help us determine to slice the grammar bitmask
        # and only applies valid mask for requests that
        # uses structured decoding.
        structured_output_request_ids: dict[str, int] = {}
@ -923,7 +923,7 @@ class Scheduler(SchedulerInterface):
                    request):
                # NOTE: structured_output_request
                # should not be None if use_structured_output, we have
-                # check above, so safe to ignore type warning
+                # checked above, so safe to ignore type warning
                request.structured_output_request.grammar.accept_tokens(  # type: ignore[union-attr]
                    req_id, new_token_ids)

@ -1242,7 +1242,7 @@ class Scheduler(SchedulerInterface):
        finished_sending reqs to the output.
        * if finished_sending: free the blocks
        # if finished_recving: add to state so we can
-            scheduler the request during the next step.
+            schedule the request during the next step.
        """

        if self.connector is not None:
--- a/vllm/v1/metrics/stats.py
+++ b/vllm/v1/metrics/stats.py
@ -59,7 +59,7 @@ class RequestStateStats:

    num_generation_tokens: int = 0

-    # This is a engine frontend timestamp (wall-clock)
+    # This is an engine frontend timestamp (wall-clock)
    arrival_time: float = 0.0

    # These are engine core timestamps (monotonic)