From 22cf679aadca99311cfb5a9f894039e464e366aa Mon Sep 17 00:00:00 2001
From: Didier Durand <2927957+didier-durand@users.noreply.github.com>
Date: Fri, 22 Aug 2025 19:38:46 +0200
Subject: [PATCH] [Doc]: fix various typos in multiple files (#23179)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
---
 vllm/beam_search.py                   | 2 +-
 vllm/compilation/backends.py          | 2 +-
 vllm/engine/arg_utils.py              | 6 +++---
 vllm/engine/multiprocessing/client.py | 4 ++--
 vllm/entrypoints/chat_utils.py        | 2 +-
 vllm/utils/__init__.py                | 4 ++--
 vllm/v1/structured_output/__init__.py | 4 ++--
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/vllm/beam_search.py b/vllm/beam_search.py
index f3bc4218323d..5a2e79e1b5c7 100644
--- a/vllm/beam_search.py
+++ b/vllm/beam_search.py
@@ -18,7 +18,7 @@ class BeamSearchSequence:
     The text field is optional and will only be filled when the sequence is
     about to be returned to the user.
     """
-    # The tokens includes the prompt.
+    # The tokens include the prompt.
     tokens: list[int]
     logprobs: list[dict[int, Logprob]]
     lora_request: Optional[LoRARequest] = None
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index 059e7a3b2976..56494dffc96b 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -484,7 +484,7 @@ class VllmBackend:
 
             factors = []
             # 0. factors come from the env, for example, The values of
-            # VLLM_PP_LAYER_PARTITION will affects the computation graph.
+            # VLLM_PP_LAYER_PARTITION will affect the computation graph.
             env_hash = envs.compute_hash()
             factors.append(env_hash)
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 4700a93dd6da..965264ee3097 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -605,7 +605,7 @@ class EngineArgs:
             **guided_decoding_kwargs["disable_additional_properties"])
         guided_decoding_group.add_argument(
             "--reasoning-parser",
-            # This choices is a special case because it's not static
+            # This choice is a special case because it's not static
             choices=list(ReasoningParserManager.reasoning_parsers),
             **guided_decoding_kwargs["reasoning_backend"])
 
@@ -1047,7 +1047,7 @@ class EngineArgs:
             # details from the config directly
             # no user input required / expected
             if isinstance(hf_config, SpeculatorsConfig):
-                # We create one since we dont create one
+                # We create one since we don't create one
                 self.speculative_config = {}
                 self.speculative_config[
                     "num_speculative_tokens"] = hf_config.num_lookahead_tokens
@@ -1775,7 +1775,7 @@ class AsyncEngineArgs(EngineArgs):
     def add_cli_args(parser: FlexibleArgumentParser,
                      async_args_only: bool = False) -> FlexibleArgumentParser:
         # Initialize plugin to update the parser, for example, The plugin may
-        # adding a new kind of quantization method to --quantization argument or
+        # add a new kind of quantization method to --quantization argument or
         # a new device to --device argument.
         load_general_plugins()
         if not async_args_only:
diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
index eca29af50055..0bb11328b1db 100644
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -539,7 +539,7 @@ class MQLLMEngineClient(EngineClient):
         if request_id in self.output_queues:
             raise ValueError(f"Request {request_id} already exists")
 
-        # 1) Create output queue for this requests.
+        # 1) Create output queue for this request.
         queue: asyncio.Queue[Union[RequestOutput,
                                    BaseException]] = asyncio.Queue()
         self.output_queues[request_id] = queue
@@ -651,7 +651,7 @@ class MQLLMEngineClient(EngineClient):
         # Uses the same I/O as generate requests
         request = RPCLoadAdapterRequest(lora_request)
 
-        # Create output queue for this requests.
+        # Create output queue for this request.
         queue: asyncio.Queue[Union[None, BaseException]] = asyncio.Queue()
         self.output_queues[request.request_id] = queue
 
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 87772a499f42..7b11a50642de 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1330,7 +1330,7 @@ def apply_mistral_chat_template(
     # mistral-common uses assert statements to stop processing of input
     # if input does not comply with the expected format.
     # We convert those assertion errors to ValueErrors so they can be
-    # are properly caught in the preprocessing_input step
+    # properly caught in the preprocessing_input step
     except (AssertionError, MistralCommonException) as e:
         raise ValueError(str(e)) from e
 
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 7079bfb8dbce..7c34a858c0a2 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -2482,7 +2482,7 @@ class PlaceholderModule(_PlaceholderBase):
     A placeholder object to use when a module does not exist.
 
     This enables more informative errors when trying to access attributes
-    of a module that does not exists.
+    of a module that does not exist.
     """
 
     def __init__(self, name: str) -> None:
@@ -3109,7 +3109,7 @@ class LazyLoader(types.ModuleType):
     """
     LazyLoader module borrowed from Tensorflow
     https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py
-    with a addition of "module caching".
+    with an addition of "module caching".
 
     Lazily import a module, mainly to avoid pulling in large dependencies.
     Modules such as `xgrammar` might do additional side effects, so we
diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py
index 63604a335d9f..3bafa61044ab 100644
--- a/vllm/v1/structured_output/__init__.py
+++ b/vllm/v1/structured_output/__init__.py
@@ -267,7 +267,7 @@ class StructuredOutputManager:
             assert request.structured_output_request is not None
             assert request.structured_output_request.grammar is not None
         # by default, we should always advance
-        # for cases that doesn't uses thinking mode.
+        # for cases that don't use thinking mode.
         if self.reasoner is not None:
             structured_req = request.structured_output_request
 
@@ -276,7 +276,7 @@ class StructuredOutputManager:
 
             # Check if reasoning ends in *this* step
             if self.reasoner.is_reasoning_end(request.all_token_ids):
-                # Reasoning just ended, so we shouldn't advanced til
+                # Reasoning just ended, so we shouldn't advance til
                 # next pass
                 structured_req.reasoning_ended = True