From 1eb2b9c10205b68658dede9dac73390706ef2e05 Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Wed, 16 Jul 2025 12:12:40 +0800 Subject: [PATCH] [CI] update typos config for CI pre-commit and fix some spells (#20919) Signed-off-by: Peter Pan --- .pre-commit-config.yaml | 2 +- csrc/cpu/sgl-kernels/common.h | 2 +- csrc/cpu/sgl-kernels/gemm.h | 2 +- csrc/cpu/sgl-kernels/gemm_int8.cpp | 2 +- csrc/cpu/sgl-kernels/vec.h | 2 +- docker/Dockerfile | 2 +- docs/usage/v1_guide.md | 2 +- pyproject.toml | 183 ++++++++++++++++++ .../moe/modular_kernel_tools/common.py | 2 +- tests/kernels/moe/test_deepgemm.py | 2 +- tests/models/test_initialization.py | 2 +- tests/v1/test_external_lb_dp.py | 2 +- typos.toml | 179 ----------------- .../backends/differential_flash_attn.py | 2 +- vllm/entrypoints/openai/serving_responses.py | 2 +- .../layers/fused_moe/fused_moe.py | 2 +- vllm/model_executor/models/phi4flash.py | 2 +- vllm/v1/attention/backends/mla/common.py | 2 +- vllm/v1/worker/tpu_model_runner.py | 2 +- 19 files changed, 200 insertions(+), 196 deletions(-) delete mode 100644 typos.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 24399677c088..5197820fb402 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: - id: ruff-format files: ^(.buildkite|benchmarks|examples)/.* - repo: https://github.com/crate-ci/typos - rev: v1.32.0 + rev: v1.34.0 hooks: - id: typos - repo: https://github.com/PyCQA/isort diff --git a/csrc/cpu/sgl-kernels/common.h b/csrc/cpu/sgl-kernels/common.h index 20261c1ef3e8..b96037e82c19 100644 --- a/csrc/cpu/sgl-kernels/common.h +++ b/csrc/cpu/sgl-kernels/common.h @@ -58,7 +58,7 @@ namespace { #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") #define CHECK_LAST_DIM_CONTIGUOUS(x) \ - TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimention") + TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimension") #define CHECK_INPUT(x) \ CHECK_CPU(x); \ diff --git a/csrc/cpu/sgl-kernels/gemm.h b/csrc/cpu/sgl-kernels/gemm.h index afae19721ae9..fba5673323f5 100644 --- a/csrc/cpu/sgl-kernels/gemm.h +++ b/csrc/cpu/sgl-kernels/gemm.h @@ -126,7 +126,7 @@ void fused_experts_int4_w4a16_kernel_impl( int64_t topk, int64_t num_tokens_post_pad); -// shared expert implememntation for int8 w8a8 +// shared expert implementation for int8 w8a8 template void shared_expert_int8_kernel_impl( scalar_t* __restrict__ output, diff --git a/csrc/cpu/sgl-kernels/gemm_int8.cpp b/csrc/cpu/sgl-kernels/gemm_int8.cpp index 5a0f65a9200d..9a5ca0642e7a 100644 --- a/csrc/cpu/sgl-kernels/gemm_int8.cpp +++ b/csrc/cpu/sgl-kernels/gemm_int8.cpp @@ -41,7 +41,7 @@ struct tinygemm_kernel_nn { __m512 vd0; __m512 vd1[COLS]; - // oops! 4x4 spills but luckly we use 4x2 + // oops! 4x4 spills but luckily we use 4x2 __m512 vbias[COLS]; // [NOTE]: s8s8 igemm compensation in avx512-vnni diff --git a/csrc/cpu/sgl-kernels/vec.h b/csrc/cpu/sgl-kernels/vec.h index 87955cfb2922..160845c9b1cb 100644 --- a/csrc/cpu/sgl-kernels/vec.h +++ b/csrc/cpu/sgl-kernels/vec.h @@ -37,7 +37,7 @@ inline Vectorized convert_from_float_ext(const Vecto #define CVT_FP16_TO_FP32(a) \ _mm512_cvtps_ph(a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)) -// this doesn't hanel NaN. +// this doesn't handle NaN. inline __m512bh cvt_e4m3_bf16_intrinsic_no_nan(__m256i fp8_vec) { const __m512i x = _mm512_cvtepu8_epi16(fp8_vec); diff --git a/docker/Dockerfile b/docker/Dockerfile index 78b548df32c1..e0e08510c10c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -63,7 +63,7 @@ ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly ARG PIP_KEYRING_PROVIDER=disabled ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER} -# Flag enables build-in KV-connector dependency libs into docker images +# Flag enables built-in KV-connector dependency libs into docker images ARG INSTALL_KV_CONNECTORS=false #################### BASE BUILD IMAGE #################### diff --git a/docs/usage/v1_guide.md b/docs/usage/v1_guide.md index d7634223542d..12150cf2a82e 100644 --- a/docs/usage/v1_guide.md +++ b/docs/usage/v1_guide.md @@ -106,7 +106,7 @@ to enable simultaneous generation and embedding using the same engine instance i Models using selective state-space mechanisms instead of standard transformer attention are partially supported. Models that use Mamba-2 layers (e.g., `Mamba2ForCausalLM`) are supported, but models that use older Mamba-1 layers -(e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet suported. Please note that these models currently require +(e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet supported. Please note that these models currently require enforcing eager mode and disabling prefix caching in V1. Models that combine Mamba-2 layers with standard attention layers are also supported (e.g., `BambaForCausalLM`, diff --git a/pyproject.toml b/pyproject.toml index 340abb385657..65ba0b4d833d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,3 +174,186 @@ respect-ignore-files = true [tool.ty.environment] python = "./.venv" + +[tool.typos.files] +# these files may be written in non english words +extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*", + "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*", + "vllm/third_party/*"] +ignore-hidden = true +ignore-files = true +ignore-dot = true +ignore-vcs = true +ignore-global = true +ignore-parent = true + +[tool.typos.default] +binary = false +check-filename = false +check-file = true +unicode = true +ignore-hex = true +identifier-leading-digits = false +locale = "en" +extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw", + ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", + ".*[Tt]h[rR].*"] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.default.extend-identifiers] +bbc5b7ede = "bbc5b7ede" +womens_doubles = "womens_doubles" +v_2nd = "v_2nd" +# splitted_input = "splitted_input" +NOOPs = "NOOPs" +typ = "typ" +nin_shortcut = "nin_shortcut" +UperNetDecoder = "UperNetDecoder" +subtile = "subtile" +cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin" +SFOuput = "SFOuput" +# huggingface transformers repo uses these words +depthwise_seperable_out_channel = "depthwise_seperable_out_channel" +DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d" +depthwise_seperable_CNN = "depthwise_seperable_CNN" + +[tool.typos.default.extend-words] +iy = "iy" +tendencias = "tendencias" +# intel cpu features +tme = "tme" +dout = "dout" +Pn = "Pn" +arange = "arange" + +[tool.typos.type.py] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.py.extend-identifiers] +arange = "arange" +NDArray = "NDArray" +EOFError = "EOFError" +fo = "fo" +ba = "ba" + +[tool.typos.type.py.extend-words] + +[tool.typos.type.cpp] +extend-glob = ["*.cu"] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.cpp.extend-identifiers] +countr_one = "countr_one" +k_ot = "k_ot" +ot = "ot" + +[tool.typos.type.cpp.extend-words] + +[tool.typos.type.rust] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.rust.extend-identifiers] +flate2 = "flate2" + +[tool.typos.type.rust.extend-words] +ser = "ser" + +[tool.typos.type.lock] +extend-glob = [] +check-file = false +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.lock.extend-identifiers] + +[tool.typos.type.lock.extend-words] + +[tool.typos.type.jl] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.jl.extend-identifiers] + +[tool.typos.type.jl.extend-words] +modul = "modul" +egals = "egals" +usig = "usig" +egal = "egal" + +[tool.typos.type.go] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.go.extend-identifiers] +flate = "flate" + +[tool.typos.type.go.extend-words] + +[tool.typos.type.css] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.css.extend-identifiers] +nd = "nd" + +[tool.typos.type.css.extend-words] + +[tool.typos.type.man] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.man.extend-identifiers] +Nd = "Nd" + +[tool.typos.type.man.extend-words] + +[tool.typos.type.cert] +extend-glob = [] +check-file = false +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.cert.extend-identifiers] + +[tool.typos.type.cert.extend-words] + +[tool.typos.type.sh] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.sh.extend-identifiers] +ot = "ot" + +[tool.typos.type.sh.extend-words] + +[tool.typos.type.vimscript] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[tool.typos.type.vimscript.extend-identifiers] +windo = "windo" + +[tool.typos.type.vimscript.extend-words] diff --git a/tests/kernels/moe/modular_kernel_tools/common.py b/tests/kernels/moe/modular_kernel_tools/common.py index a1319ab0509b..fd99e8dc5c98 100644 --- a/tests/kernels/moe/modular_kernel_tools/common.py +++ b/tests/kernels/moe/modular_kernel_tools/common.py @@ -416,7 +416,7 @@ class RankTensors: # We dequant and use that as hidden_states so the tests are stable. # quantizing and dequantizing yield slightly different results # depending on the hardware. Here we, quantize and dequantize - # first - so further quantize and dequantize will yeild the same + # first - so further quantize and dequantize will yield the same # values. if config.is_per_tensor_act_quant: a_q, a_scales = ops.scaled_fp8_quant( diff --git a/tests/kernels/moe/test_deepgemm.py b/tests/kernels/moe/test_deepgemm.py index 1460fdd3aeaf..f7578e226917 100644 --- a/tests/kernels/moe/test_deepgemm.py +++ b/tests/kernels/moe/test_deepgemm.py @@ -95,7 +95,7 @@ def run_single_case(m, n, k, topk, num_experts, block_size): topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1) topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1) - # triton referrence + # triton reference out_triton = fused_experts( hidden_states=tokens_bf16, w1=w1, diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index ea6a2cc37ccf..2d12327dc2ec 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -43,7 +43,7 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch): text_config = hf_config.get_text_config() # Ensure at least 2 expert per group - # Since `grouped_topk` assums top-2 + # Since `grouped_topk` assumes top-2 n_group = getattr(text_config, 'n_group', None) num_experts = n_group * 2 if n_group is not None else 2 diff --git a/tests/v1/test_external_lb_dp.py b/tests/v1/test_external_lb_dp.py index 17952dfb0d91..98fefad1ff4a 100644 --- a/tests/v1/test_external_lb_dp.py +++ b/tests/v1/test_external_lb_dp.py @@ -17,7 +17,7 @@ MODEL_NAME = "ibm-research/PowerMoE-3b" # Number of data parallel ranks for external LB testing DP_SIZE = int(os.getenv("DP_SIZE", "2")) -# Default tensor parallell size to use +# Default tensor parallel size to use TP_SIZE = int(os.getenv("TP_SIZE", "1")) diff --git a/typos.toml b/typos.toml deleted file mode 100644 index f51ce2f36208..000000000000 --- a/typos.toml +++ /dev/null @@ -1,179 +0,0 @@ -[files] -# these files may be written in non english words -extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*", - "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*", - "vllm/third_party/*"] -ignore-hidden = true -ignore-files = true -ignore-dot = true -ignore-vcs = true -ignore-global = true -ignore-parent = true - -[default] -binary = false -check-filename = false -check-file = true -unicode = true -ignore-hex = true -identifier-leading-digits = false -locale = "en" -extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw", - ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*", - ".*ot.*", ".*[Tt]h[rR].*"] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[default.extend-identifiers] -bbc5b7ede = "bbc5b7ede" -womens_doubles = "womens_doubles" -v_2nd = "v_2nd" -splitted_input = "splitted_input" -NOOPs = "NOOPs" -typ = "typ" -nin_shortcut = "nin_shortcut" -UperNetDecoder = "UperNetDecoder" -subtile = "subtile" -cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin" -SFOuput = "SFOuput" -# huggingface transformers repo uses these words -depthwise_seperable_out_channel = "depthwise_seperable_out_channel" -DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d" -depthwise_seperable_CNN = "depthwise_seperable_CNN" - -[default.extend-words] -iy = "iy" -tendencias = "tendencias" -# intel cpu features -tme = "tme" -dout = "dout" -Pn = "Pn" -arange = "arange" - -[type.py] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.py.extend-identifiers] -arange = "arange" -NDArray = "NDArray" -EOFError = "EOFError" - -[type.py.extend-words] - -[type.cpp] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.cpp.extend-identifiers] -countr_one = "countr_one" - -[type.cpp.extend-words] - -[type.rust] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.rust.extend-identifiers] -flate2 = "flate2" - -[type.rust.extend-words] -ser = "ser" - -[type.lock] -extend-glob = [] -check-file = false -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.lock.extend-identifiers] - -[type.lock.extend-words] - -[type.jl] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.jl.extend-identifiers] - -[type.jl.extend-words] -modul = "modul" -egals = "egals" -usig = "usig" -egal = "egal" - -[type.go] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.go.extend-identifiers] -flate = "flate" - -[type.go.extend-words] - -[type.css] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.css.extend-identifiers] -nd = "nd" - -[type.css.extend-words] - -[type.man] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.man.extend-identifiers] -Nd = "Nd" - -[type.man.extend-words] - -[type.cert] -extend-glob = [] -check-file = false -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.cert.extend-identifiers] - -[type.cert.extend-words] - -[type.sh] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.sh.extend-identifiers] -stap = "stap" -ot = "ot" - -[type.sh.extend-words] - -[type.vimscript] -extend-glob = [] -extend-ignore-identifiers-re = [] -extend-ignore-words-re = [] -extend-ignore-re = [] - -[type.vimscript.extend-identifiers] -windo = "windo" - -[type.vimscript.extend-words] diff --git a/vllm/attention/backends/differential_flash_attn.py b/vllm/attention/backends/differential_flash_attn.py index 7c35e58967d0..1c139952371a 100644 --- a/vllm/attention/backends/differential_flash_attn.py +++ b/vllm/attention/backends/differential_flash_attn.py @@ -961,7 +961,7 @@ class DifferentialFlashAttentionImpl(AttentionImpl): "... H (two D) -> ... (H two) D", two=2) - else: # re-use the kv cache, full attention + else: # reuse the kv cache, full attention q = q.view(-1, self.num_heads, self.head_size) q1, q2 = self.split_heads(q) # kv_cache shape is (2, num_blocks, block_size, num_kv_heads, head_size) # noqa: E501 diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index f7bde6e243b7..a359371848ce 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -372,7 +372,7 @@ class OpenAIServingResponses(OpenAIServing): }) # Append the new input. - # Reponses API supports simple text inputs without chat format. + # Responses API supports simple text inputs without chat format. if isinstance(request.input, str): messages.append({"role": "user", "content": request.input}) else: diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index f0bffc7dae27..079486dd438a 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1172,7 +1172,7 @@ def fused_experts( allow_cutlass_block_scaled_grouped_gemm: bool = False) -> torch.Tensor: # For now, disable DeepGemm for small N (<= 512) until better # permute/unpermute ops are available. - # However, on B200, we use DeepGemm for all cases becuase they only support + # However, on B200, we use DeepGemm for all cases because they only support # E8M0 scale, which means we requantize the weight and input to the specific # scale. Fallen back to cutlass or triton for some cases would cause # accuracy issue. diff --git a/vllm/model_executor/models/phi4flash.py b/vllm/model_executor/models/phi4flash.py index 10f8b6552afb..c1dd9fab7faa 100644 --- a/vllm/model_executor/models/phi4flash.py +++ b/vllm/model_executor/models/phi4flash.py @@ -193,7 +193,7 @@ class SambaYAttention(nn.Module): ], dim=-1) attn_output = self.attn(q, k, v) - else: # re-use the kv cache, full attention + else: # reuse the kv cache, full attention q = self.Wqkv(hidden_states) attn_output = self.attn(q, None, None) attn_output = attn_output.view(-1, self.num_heads * self.head_dim) diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 381a92a83093..173c8466f6d0 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -394,7 +394,7 @@ def use_cudnn_prefill() -> bool: # Currently 394MB, this can be tuned based on GEMM sizes used. -# Choosen to be the same as sglang: +# Chosen to be the same as sglang: # https://github.com/sgl-project/sglang/blob/766392c6bda2558b61ce6d1c1bfd8081a549e1f1/python/sglang/global_config.py#L37 FLASHINFER_WORKSPACE_BUFFER_SIZE = 394 * 1024 * 1024 diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 83a80bd865b1..6ac069299357 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -969,7 +969,7 @@ class TPUModelRunner(LoRAModelRunnerMixin): else: mm_embeds = [] xm.mark_step() - # Prepare inputs, the requests might be splitted into multiple + # Prepare inputs, the requests might be split into multiple # executions, combine the result of each execution. start_index = 0 combined_selected_tokens: list[torch.Tensor] = []