From 3bdd426636cec97d4cd5cff0e1a057b45429e07c Mon Sep 17 00:00:00 2001
From: Wilson Wu <iwilsonwu@gmail.com>
Date: Wed, 10 Dec 2025 12:05:28 +0800
Subject: [PATCH] Fix typos in comments across multiple files (#30345)

Signed-off-by: Wilson Wu <iwilsonwu@gmail.com>
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
---
 csrc/cpu/cpu_attn_impl.hpp                                      | 2 +-
 csrc/quantization/machete/machete_mainloop.cuh                  | 2 +-
 docs/features/nixl_connector_usage.md                           | 2 +-
 vllm/model_executor/layers/fused_moe/layer.py                   | 2 +-
 .../schemes/compressed_tensors_w4a16_nvfp4.py                   | 2 +-
 vllm/v1/worker/gpu_model_runner.py                              | 2 +-
 vllm/v1/worker/utils.py                                         | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/csrc/cpu/cpu_attn_impl.hpp b/csrc/cpu/cpu_attn_impl.hpp
index 02164ed3666e3..e3e077b845f4f 100644
--- a/csrc/cpu/cpu_attn_impl.hpp
+++ b/csrc/cpu/cpu_attn_impl.hpp
@@ -186,7 +186,7 @@ struct AttentionMetadata {
 //  - Intermediate outputs: q_tile_size * head_dim * output_buffer_elem_size + 2
 //  * q_tile_size * 4, partial output, max + sum (float)
 // Reduction scratchpad contains:
-//  - flags: bool array to indicate wether the split is finished
+//  - flags: bool array to indicate whether the split is finished
 //  - outputs: split_num * q_tile_size * head_dim * output_buffer_elem_size
 //  - max, sum: 2 * split_num * q_tile_size * 4
 class AttentionScratchPad {
diff --git a/csrc/quantization/machete/machete_mainloop.cuh b/csrc/quantization/machete/machete_mainloop.cuh
index 2f52a6b7a0246..9f02f4f179741 100644
--- a/csrc/quantization/machete/machete_mainloop.cuh
+++ b/csrc/quantization/machete/machete_mainloop.cuh
@@ -617,7 +617,7 @@ struct MacheteCollectiveMma {
 
   // Same as upstream, should be kept the same when possible, not formatted for
   // easier comparison
-  //   with `SwapAB ? N : M -> M` since we dont support SwapAB
+  //   with `SwapAB ? N : M -> M` since we don't support SwapAB
   // clang-format off
   template<class ProblemShape>
   static bool
diff --git a/docs/features/nixl_connector_usage.md b/docs/features/nixl_connector_usage.md
index 84c8f9e77d6d3..601205e1ed0b1 100644
--- a/docs/features/nixl_connector_usage.md
+++ b/docs/features/nixl_connector_usage.md
@@ -22,7 +22,7 @@ python tools/install_nixl_from_source_ubuntu.py
 NixlConnector uses NIXL library for underlying communication, which supports multiple transport backends. UCX (Unified Communication X) is the primary default transport library used by NIXL. Configure transport environment variables:
 
 ```bash
-# Example UCX configuration, adjust according to your enviroment
+# Example UCX configuration, adjust according to your environment
 export UCX_TLS=all  # or specify specific transports like "rc,ud,sm,^cuda_ipc" ..etc
 export UCX_NET_DEVICES=all  # or specify network devices like "mlx5_0:1,mlx5_1:1"
 ```
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index e635382068a63..61dd1892d67ea 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -881,7 +881,7 @@ class FusedMoE(CustomOp):
             # Record that the clone will be used by shared_experts_stream
             # to avoid gc issue from deallocation of hidden_states_clone
             # For more details: https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501
-            # NOTE: We dont need shared_output.record_stream(current_stream())
+            # NOTE: We don't need shared_output.record_stream(current_stream())
             # because we synch the streams before using shared_output.
             hidden_states_clone.record_stream(self.shared_experts_stream)
 
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py
index 3afadc6eb7e5b..d2701a464f129 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py
@@ -28,7 +28,7 @@ class CompressedTensorsW4A16Fp4(CompressedTensorsScheme):
 
     @classmethod
     def get_min_capability(cls) -> int:
-        # dont restrict as emulations
+        # don't restrict as emulations
         return 80
 
     def create_weights(
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index f6f89d6eb6736..39456d2e80ed0 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -4871,7 +4871,7 @@ class GPUModelRunner(
         # we need to adjust the cudagraph sizes to be a multiple of the uniform
         # decode query length to avoid: https://github.com/vllm-project/vllm/issues/28207
         # temp-fix: https://github.com/vllm-project/vllm/issues/28207#issuecomment-3504004536
-        # Will be removed in the near future when we have seperate cudagraph capture
+        # Will be removed in the near future when we have separate cudagraph capture
         # sizes for decode and mixed prefill-decode.
         if (
             cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py
index 0b0e2006d73d2..4dd9463ee6285 100644
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -135,7 +135,7 @@ class AttentionGroup:
     kv_cache_spec: KVCacheSpec
     kv_cache_group_id: int
     # When ubatching is enabled we will have a metadata builder for each ubatch
-    # so that if they use internal persistant buffers for cudagraphs, and they
+    # so that if they use internal persistent buffers for cudagraphs, and they
     # won't have to worry about conflicting with the other ubatches.
     metadata_builders: list[AttentionMetadataBuilder] = field(
         default_factory=lambda: []