[CI] update typos config for CI pre-commit and fix some spells (#20919)

Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
2026-06-03 21:42:19 +08:00 · 2025-07-16 12:12:40 +08:00 · 2025-07-16 12:12:40 +08:00 · 1eb2b9c102
commit 1eb2b9c102
parent 6ebf313790
19 changed files with 200 additions and 196 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -21,7 +21,7 @@ repos:
  - id: ruff-format
    files: ^(.buildkite|benchmarks|examples)/.*
 - repo: https://github.com/crate-ci/typos
-  rev: v1.32.0
+  rev: v1.34.0
  hooks:
  - id: typos
 - repo: https://github.com/PyCQA/isort
--- a/csrc/cpu/sgl-kernels/common.h
+++ b/csrc/cpu/sgl-kernels/common.h
@ -58,7 +58,7 @@ namespace {
 #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 #define CHECK_LAST_DIM_CONTIGUOUS(x) \
-  TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimention")
+  TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimension")
 #define CHECK_INPUT(x) \
  CHECK_CPU(x);        \
--- a/csrc/cpu/sgl-kernels/gemm.h
+++ b/csrc/cpu/sgl-kernels/gemm.h
@ -126,7 +126,7 @@ void fused_experts_int4_w4a16_kernel_impl(
    int64_t topk,
    int64_t num_tokens_post_pad);
-// shared expert implememntation for int8 w8a8
+// shared expert implementation for int8 w8a8
 template <typename scalar_t>
 void shared_expert_int8_kernel_impl(
    scalar_t* __restrict__ output,
--- a/csrc/cpu/sgl-kernels/gemm_int8.cpp
+++ b/csrc/cpu/sgl-kernels/gemm_int8.cpp
@ -41,7 +41,7 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
    __m512  vd0;
    __m512  vd1[COLS];
-    // oops! 4x4 spills but luckly we use 4x2
+    // oops! 4x4 spills but luckily we use 4x2
    __m512 vbias[COLS];
    // [NOTE]: s8s8 igemm compensation in avx512-vnni
--- a/csrc/cpu/sgl-kernels/vec.h
+++ b/csrc/cpu/sgl-kernels/vec.h
@ -37,7 +37,7 @@ inline Vectorized<at::BFloat16> convert_from_float_ext<at::BFloat16>(const Vecto
 #define CVT_FP16_TO_FP32(a) \
    _mm512_cvtps_ph(a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))
-// this doesn't hanel NaN.
+// this doesn't handle NaN.
 inline __m512bh cvt_e4m3_bf16_intrinsic_no_nan(__m256i fp8_vec) {
  const __m512i x = _mm512_cvtepu8_epi16(fp8_vec);
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -63,7 +63,7 @@ ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly
 ARG PIP_KEYRING_PROVIDER=disabled
 ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}
-# Flag enables build-in KV-connector dependency libs into docker images
+# Flag enables built-in KV-connector dependency libs into docker images
 ARG INSTALL_KV_CONNECTORS=false
 #################### BASE BUILD IMAGE ####################
--- a/docs/usage/v1_guide.md
+++ b/docs/usage/v1_guide.md
@ -106,7 +106,7 @@ to enable simultaneous generation and embedding using the same engine instance i
 Models using selective state-space mechanisms instead of standard transformer attention are partially supported.
 Models that use Mamba-2 layers (e.g., `Mamba2ForCausalLM`) are supported, but models that use older Mamba-1 layers
-(e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet suported. Please note that these models currently require
+(e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet supported. Please note that these models currently require
 enforcing eager mode and disabling prefix caching in V1.
 Models that combine Mamba-2 layers with standard attention layers are also supported (e.g., `BambaForCausalLM`,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -174,3 +174,186 @@ respect-ignore-files = true
 [tool.ty.environment]
 python = "./.venv"
 [tool.typos.files]
 # these files may be written in non english words
 extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
    "vllm/third_party/*"]
 ignore-hidden = true
 ignore-files = true
 ignore-dot = true
 ignore-vcs = true
 ignore-global = true
 ignore-parent = true
 [tool.typos.default]
 binary = false
 check-filename = false
 check-file = true
 unicode = true
 ignore-hex = true
 identifier-leading-digits = false
 locale = "en"
 extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*",
     ".*[Tt]h[rR].*"]
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.default.extend-identifiers]
 bbc5b7ede = "bbc5b7ede"
 womens_doubles = "womens_doubles"
 v_2nd = "v_2nd"
 # splitted_input = "splitted_input"
 NOOPs = "NOOPs"
 typ = "typ"
 nin_shortcut = "nin_shortcut"
 UperNetDecoder = "UperNetDecoder"
 subtile = "subtile"
 cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
 SFOuput = "SFOuput"
 # huggingface transformers repo uses these words
 depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
 DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
 depthwise_seperable_CNN = "depthwise_seperable_CNN"
 [tool.typos.default.extend-words]
 iy = "iy"
 tendencias = "tendencias"
 # intel cpu features
 tme = "tme"
 dout = "dout"
 Pn = "Pn"
 arange = "arange"
 [tool.typos.type.py]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.py.extend-identifiers]
 arange = "arange"
 NDArray = "NDArray"
 EOFError = "EOFError"
 fo = "fo"
 ba = "ba"
 [tool.typos.type.py.extend-words]
 [tool.typos.type.cpp]
 extend-glob = ["*.cu"]
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.cpp.extend-identifiers]
 countr_one = "countr_one"
 k_ot = "k_ot"
 ot = "ot"
 [tool.typos.type.cpp.extend-words]
 [tool.typos.type.rust]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.rust.extend-identifiers]
 flate2 = "flate2"
 [tool.typos.type.rust.extend-words]
 ser = "ser"
 [tool.typos.type.lock]
 extend-glob = []
 check-file = false
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.lock.extend-identifiers]
 [tool.typos.type.lock.extend-words]
 [tool.typos.type.jl]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.jl.extend-identifiers]
 [tool.typos.type.jl.extend-words]
 modul = "modul"
 egals = "egals"
 usig = "usig"
 egal = "egal"
 [tool.typos.type.go]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.go.extend-identifiers]
 flate = "flate"
 [tool.typos.type.go.extend-words]
 [tool.typos.type.css]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.css.extend-identifiers]
 nd = "nd"
 [tool.typos.type.css.extend-words]
 [tool.typos.type.man]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.man.extend-identifiers]
 Nd = "Nd"
 [tool.typos.type.man.extend-words]
 [tool.typos.type.cert]
 extend-glob = []
 check-file = false
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.cert.extend-identifiers]
 [tool.typos.type.cert.extend-words]
 [tool.typos.type.sh]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.sh.extend-identifiers]
 ot = "ot"
 [tool.typos.type.sh.extend-words]
 [tool.typos.type.vimscript]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [tool.typos.type.vimscript.extend-identifiers]
 windo = "windo"
 [tool.typos.type.vimscript.extend-words]
--- a/tests/kernels/moe/modular_kernel_tools/common.py
+++ b/tests/kernels/moe/modular_kernel_tools/common.py
@ -416,7 +416,7 @@ class RankTensors:
        # We dequant and use that as hidden_states so the tests are stable.
        # quantizing and dequantizing yield slightly different results
        # depending on the hardware. Here we, quantize and dequantize
-        # first - so further quantize and dequantize will yeild the same
+        # first - so further quantize and dequantize will yield the same
        # values.
        if config.is_per_tensor_act_quant:
            a_q, a_scales = ops.scaled_fp8_quant(
--- a/tests/kernels/moe/test_deepgemm.py
+++ b/tests/kernels/moe/test_deepgemm.py
@ -95,7 +95,7 @@ def run_single_case(m, n, k, topk, num_experts, block_size):
    topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1)
    topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1)
-    # triton referrence
+    # triton reference
    out_triton = fused_experts(
        hidden_states=tokens_bf16,
        w1=w1,
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@ -43,7 +43,7 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
        text_config = hf_config.get_text_config()
        # Ensure at least 2 expert per group
-        # Since `grouped_topk` assums top-2
+        # Since `grouped_topk` assumes top-2
        n_group = getattr(text_config, 'n_group', None)
        num_experts = n_group * 2 if n_group is not None else 2
--- a/tests/v1/test_external_lb_dp.py
+++ b/tests/v1/test_external_lb_dp.py
@ -17,7 +17,7 @@ MODEL_NAME = "ibm-research/PowerMoE-3b"
 # Number of data parallel ranks for external LB testing
 DP_SIZE = int(os.getenv("DP_SIZE", "2"))
-# Default tensor parallell size to use
+# Default tensor parallel size to use
 TP_SIZE = int(os.getenv("TP_SIZE", "1"))
--- a/typos.toml
+++ b/typos.toml
@ -1,179 +0,0 @@
 [files]
 # these files may be written in non english words
 extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
    "vllm/third_party/*"]
 ignore-hidden = true
 ignore-files = true
 ignore-dot = true
 ignore-vcs = true
 ignore-global = true
 ignore-parent = true
 [default]
 binary = false
 check-filename = false
 check-file = true
 unicode = true
 ignore-hex = true
 identifier-leading-digits = false
 locale = "en"
 extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
    ".*ot.*", ".*[Tt]h[rR].*"]
 extend-ignore-words-re = []
 extend-ignore-re = []
 [default.extend-identifiers]
 bbc5b7ede = "bbc5b7ede"
 womens_doubles = "womens_doubles"
 v_2nd = "v_2nd"
 splitted_input = "splitted_input"
 NOOPs = "NOOPs"
 typ = "typ"
 nin_shortcut = "nin_shortcut"
 UperNetDecoder = "UperNetDecoder"
 subtile = "subtile"
 cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
 SFOuput = "SFOuput"
 # huggingface transformers repo uses these words
 depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
 DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
 depthwise_seperable_CNN = "depthwise_seperable_CNN"
 [default.extend-words]
 iy = "iy"
 tendencias = "tendencias"
 # intel cpu features
 tme = "tme"
 dout = "dout"
 Pn = "Pn"
 arange = "arange"
 [type.py]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.py.extend-identifiers]
 arange = "arange"
 NDArray = "NDArray"
 EOFError = "EOFError"
 [type.py.extend-words]
 [type.cpp]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.cpp.extend-identifiers]
 countr_one = "countr_one"
 [type.cpp.extend-words]
 [type.rust]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.rust.extend-identifiers]
 flate2 = "flate2"
 [type.rust.extend-words]
 ser = "ser"
 [type.lock]
 extend-glob = []
 check-file = false
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.lock.extend-identifiers]
 [type.lock.extend-words]
 [type.jl]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.jl.extend-identifiers]
 [type.jl.extend-words]
 modul = "modul"
 egals = "egals"
 usig = "usig"
 egal = "egal"
 [type.go]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.go.extend-identifiers]
 flate = "flate"
 [type.go.extend-words]
 [type.css]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.css.extend-identifiers]
 nd = "nd"
 [type.css.extend-words]
 [type.man]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.man.extend-identifiers]
 Nd = "Nd"
 [type.man.extend-words]
 [type.cert]
 extend-glob = []
 check-file = false
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.cert.extend-identifiers]
 [type.cert.extend-words]
 [type.sh]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.sh.extend-identifiers]
 stap = "stap"
 ot = "ot"
 [type.sh.extend-words]
 [type.vimscript]
 extend-glob = []
 extend-ignore-identifiers-re = []
 extend-ignore-words-re = []
 extend-ignore-re = []
 [type.vimscript.extend-identifiers]
 windo = "windo"
 [type.vimscript.extend-words]
--- a/vllm/attention/backends/differential_flash_attn.py
+++ b/vllm/attention/backends/differential_flash_attn.py
@ -961,7 +961,7 @@ class DifferentialFlashAttentionImpl(AttentionImpl):
                                    "... H (two D) -> ... (H two) D",
                                    two=2)
-        else:  # re-use the kv cache, full attention
+        else:  # reuse the kv cache, full attention
            q = q.view(-1, self.num_heads, self.head_size)
            q1, q2 = self.split_heads(q)
            # kv_cache shape is (2, num_blocks, block_size, num_kv_heads, head_size) # noqa: E501
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@ -372,7 +372,7 @@ class OpenAIServingResponses(OpenAIServing):
                        })
        # Append the new input.
-        # Reponses API supports simple text inputs without chat format.
+        # Responses API supports simple text inputs without chat format.
        if isinstance(request.input, str):
            messages.append({"role": "user", "content": request.input})
        else:
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@ -1172,7 +1172,7 @@ def fused_experts(
        allow_cutlass_block_scaled_grouped_gemm: bool = False) -> torch.Tensor:
    # For now, disable DeepGemm for small N (<= 512) until better
    # permute/unpermute ops are available.
-    # However, on B200, we use DeepGemm for all cases becuase they only support
+    # However, on B200, we use DeepGemm for all cases because they only support
    # E8M0 scale, which means we requantize the weight and input to the specific
    # scale. Fallen back to cutlass or triton for some cases would cause
    # accuracy issue.
--- a/vllm/model_executor/models/phi4flash.py
+++ b/vllm/model_executor/models/phi4flash.py
@ -193,7 +193,7 @@ class SambaYAttention(nn.Module):
            ],
                                dim=-1)
            attn_output = self.attn(q, k, v)
-        else:  # re-use the kv cache, full attention
+        else:  # reuse the kv cache, full attention
            q = self.Wqkv(hidden_states)
            attn_output = self.attn(q, None, None)
        attn_output = attn_output.view(-1, self.num_heads * self.head_dim)
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@ -394,7 +394,7 @@ def use_cudnn_prefill() -> bool:
 # Currently 394MB, this can be tuned based on GEMM sizes used.
-# Choosen to be the same as sglang:
+# Chosen to be the same as sglang:
 #  https://github.com/sgl-project/sglang/blob/766392c6bda2558b61ce6d1c1bfd8081a549e1f1/python/sglang/global_config.py#L37
 FLASHINFER_WORKSPACE_BUFFER_SIZE = 394 * 1024 * 1024
--- a/vllm/v1/worker/tpu_model_runner.py
+++ b/vllm/v1/worker/tpu_model_runner.py
@ -969,7 +969,7 @@ class TPUModelRunner(LoRAModelRunnerMixin):
        else:
            mm_embeds = []
        xm.mark_step()
-        # Prepare inputs, the requests might be splitted into multiple
+        # Prepare inputs, the requests might be split into multiple
        # executions, combine the result of each execution.
        start_index = 0
        combined_selected_tokens: list[torch.Tensor] = []