[CI] update typos config for CI pre-commit and fix some spells (#20919)

Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
This commit is contained in:
Peter Pan 2025-07-16 12:12:40 +08:00 committed by GitHub
parent 6ebf313790
commit 1eb2b9c102
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 200 additions and 196 deletions

View File

@ -21,7 +21,7 @@ repos:
- id: ruff-format - id: ruff-format
files: ^(.buildkite|benchmarks|examples)/.* files: ^(.buildkite|benchmarks|examples)/.*
- repo: https://github.com/crate-ci/typos - repo: https://github.com/crate-ci/typos
rev: v1.32.0 rev: v1.34.0
hooks: hooks:
- id: typos - id: typos
- repo: https://github.com/PyCQA/isort - repo: https://github.com/PyCQA/isort

View File

@ -58,7 +58,7 @@ namespace {
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_LAST_DIM_CONTIGUOUS(x) \ #define CHECK_LAST_DIM_CONTIGUOUS(x) \
TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimention") TORCH_CHECK(x.strides()[x.strides().size() - 1] == 1, #x "must be contiguous at last dimension")
#define CHECK_INPUT(x) \ #define CHECK_INPUT(x) \
CHECK_CPU(x); \ CHECK_CPU(x); \

View File

@ -126,7 +126,7 @@ void fused_experts_int4_w4a16_kernel_impl(
int64_t topk, int64_t topk,
int64_t num_tokens_post_pad); int64_t num_tokens_post_pad);
// shared expert implememntation for int8 w8a8 // shared expert implementation for int8 w8a8
template <typename scalar_t> template <typename scalar_t>
void shared_expert_int8_kernel_impl( void shared_expert_int8_kernel_impl(
scalar_t* __restrict__ output, scalar_t* __restrict__ output,

View File

@ -41,7 +41,7 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
__m512 vd0; __m512 vd0;
__m512 vd1[COLS]; __m512 vd1[COLS];
// oops! 4x4 spills but luckly we use 4x2 // oops! 4x4 spills but luckily we use 4x2
__m512 vbias[COLS]; __m512 vbias[COLS];
// [NOTE]: s8s8 igemm compensation in avx512-vnni // [NOTE]: s8s8 igemm compensation in avx512-vnni

View File

@ -37,7 +37,7 @@ inline Vectorized<at::BFloat16> convert_from_float_ext<at::BFloat16>(const Vecto
#define CVT_FP16_TO_FP32(a) \ #define CVT_FP16_TO_FP32(a) \
_mm512_cvtps_ph(a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)) _mm512_cvtps_ph(a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))
// this doesn't hanel NaN. // this doesn't handle NaN.
inline __m512bh cvt_e4m3_bf16_intrinsic_no_nan(__m256i fp8_vec) { inline __m512bh cvt_e4m3_bf16_intrinsic_no_nan(__m256i fp8_vec) {
const __m512i x = _mm512_cvtepu8_epi16(fp8_vec); const __m512i x = _mm512_cvtepu8_epi16(fp8_vec);

View File

@ -63,7 +63,7 @@ ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly
ARG PIP_KEYRING_PROVIDER=disabled ARG PIP_KEYRING_PROVIDER=disabled
ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER} ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}
# Flag enables build-in KV-connector dependency libs into docker images # Flag enables built-in KV-connector dependency libs into docker images
ARG INSTALL_KV_CONNECTORS=false ARG INSTALL_KV_CONNECTORS=false
#################### BASE BUILD IMAGE #################### #################### BASE BUILD IMAGE ####################

View File

@ -106,7 +106,7 @@ to enable simultaneous generation and embedding using the same engine instance i
Models using selective state-space mechanisms instead of standard transformer attention are partially supported. Models using selective state-space mechanisms instead of standard transformer attention are partially supported.
Models that use Mamba-2 layers (e.g., `Mamba2ForCausalLM`) are supported, but models that use older Mamba-1 layers Models that use Mamba-2 layers (e.g., `Mamba2ForCausalLM`) are supported, but models that use older Mamba-1 layers
(e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet suported. Please note that these models currently require (e.g., `MambaForCausalLM`, `JambaForCausalLM`) are not yet supported. Please note that these models currently require
enforcing eager mode and disabling prefix caching in V1. enforcing eager mode and disabling prefix caching in V1.
Models that combine Mamba-2 layers with standard attention layers are also supported (e.g., `BambaForCausalLM`, Models that combine Mamba-2 layers with standard attention layers are also supported (e.g., `BambaForCausalLM`,

View File

@ -174,3 +174,186 @@ respect-ignore-files = true
[tool.ty.environment] [tool.ty.environment]
python = "./.venv" python = "./.venv"
[tool.typos.files]
# these files may be written in non english words
extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
"benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
"vllm/third_party/*"]
ignore-hidden = true
ignore-files = true
ignore-dot = true
ignore-vcs = true
ignore-global = true
ignore-parent = true
[tool.typos.default]
binary = false
check-filename = false
check-file = true
unicode = true
ignore-hex = true
identifier-leading-digits = false
locale = "en"
extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*",
".*[Tt]h[rR].*"]
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.default.extend-identifiers]
bbc5b7ede = "bbc5b7ede"
womens_doubles = "womens_doubles"
v_2nd = "v_2nd"
# splitted_input = "splitted_input"
NOOPs = "NOOPs"
typ = "typ"
nin_shortcut = "nin_shortcut"
UperNetDecoder = "UperNetDecoder"
subtile = "subtile"
cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
SFOuput = "SFOuput"
# huggingface transformers repo uses these words
depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
depthwise_seperable_CNN = "depthwise_seperable_CNN"
[tool.typos.default.extend-words]
iy = "iy"
tendencias = "tendencias"
# intel cpu features
tme = "tme"
dout = "dout"
Pn = "Pn"
arange = "arange"
[tool.typos.type.py]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.py.extend-identifiers]
arange = "arange"
NDArray = "NDArray"
EOFError = "EOFError"
fo = "fo"
ba = "ba"
[tool.typos.type.py.extend-words]
[tool.typos.type.cpp]
extend-glob = ["*.cu"]
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cpp.extend-identifiers]
countr_one = "countr_one"
k_ot = "k_ot"
ot = "ot"
[tool.typos.type.cpp.extend-words]
[tool.typos.type.rust]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.rust.extend-identifiers]
flate2 = "flate2"
[tool.typos.type.rust.extend-words]
ser = "ser"
[tool.typos.type.lock]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.lock.extend-identifiers]
[tool.typos.type.lock.extend-words]
[tool.typos.type.jl]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.jl.extend-identifiers]
[tool.typos.type.jl.extend-words]
modul = "modul"
egals = "egals"
usig = "usig"
egal = "egal"
[tool.typos.type.go]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.go.extend-identifiers]
flate = "flate"
[tool.typos.type.go.extend-words]
[tool.typos.type.css]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.css.extend-identifiers]
nd = "nd"
[tool.typos.type.css.extend-words]
[tool.typos.type.man]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.man.extend-identifiers]
Nd = "Nd"
[tool.typos.type.man.extend-words]
[tool.typos.type.cert]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cert.extend-identifiers]
[tool.typos.type.cert.extend-words]
[tool.typos.type.sh]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.sh.extend-identifiers]
ot = "ot"
[tool.typos.type.sh.extend-words]
[tool.typos.type.vimscript]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.vimscript.extend-identifiers]
windo = "windo"
[tool.typos.type.vimscript.extend-words]

View File

@ -416,7 +416,7 @@ class RankTensors:
# We dequant and use that as hidden_states so the tests are stable. # We dequant and use that as hidden_states so the tests are stable.
# quantizing and dequantizing yield slightly different results # quantizing and dequantizing yield slightly different results
# depending on the hardware. Here we, quantize and dequantize # depending on the hardware. Here we, quantize and dequantize
# first - so further quantize and dequantize will yeild the same # first - so further quantize and dequantize will yield the same
# values. # values.
if config.is_per_tensor_act_quant: if config.is_per_tensor_act_quant:
a_q, a_scales = ops.scaled_fp8_quant( a_q, a_scales = ops.scaled_fp8_quant(

View File

@ -95,7 +95,7 @@ def run_single_case(m, n, k, topk, num_experts, block_size):
topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1) topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1)
topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1) topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1)
# triton referrence # triton reference
out_triton = fused_experts( out_triton = fused_experts(
hidden_states=tokens_bf16, hidden_states=tokens_bf16,
w1=w1, w1=w1,

View File

@ -43,7 +43,7 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
text_config = hf_config.get_text_config() text_config = hf_config.get_text_config()
# Ensure at least 2 expert per group # Ensure at least 2 expert per group
# Since `grouped_topk` assums top-2 # Since `grouped_topk` assumes top-2
n_group = getattr(text_config, 'n_group', None) n_group = getattr(text_config, 'n_group', None)
num_experts = n_group * 2 if n_group is not None else 2 num_experts = n_group * 2 if n_group is not None else 2

View File

@ -17,7 +17,7 @@ MODEL_NAME = "ibm-research/PowerMoE-3b"
# Number of data parallel ranks for external LB testing # Number of data parallel ranks for external LB testing
DP_SIZE = int(os.getenv("DP_SIZE", "2")) DP_SIZE = int(os.getenv("DP_SIZE", "2"))
# Default tensor parallell size to use # Default tensor parallel size to use
TP_SIZE = int(os.getenv("TP_SIZE", "1")) TP_SIZE = int(os.getenv("TP_SIZE", "1"))

View File

@ -1,179 +0,0 @@
[files]
# these files may be written in non english words
extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
"benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
"vllm/third_party/*"]
ignore-hidden = true
ignore-files = true
ignore-dot = true
ignore-vcs = true
ignore-global = true
ignore-parent = true
[default]
binary = false
check-filename = false
check-file = true
unicode = true
ignore-hex = true
identifier-leading-digits = false
locale = "en"
extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
".*ot.*", ".*[Tt]h[rR].*"]
extend-ignore-words-re = []
extend-ignore-re = []
[default.extend-identifiers]
bbc5b7ede = "bbc5b7ede"
womens_doubles = "womens_doubles"
v_2nd = "v_2nd"
splitted_input = "splitted_input"
NOOPs = "NOOPs"
typ = "typ"
nin_shortcut = "nin_shortcut"
UperNetDecoder = "UperNetDecoder"
subtile = "subtile"
cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
SFOuput = "SFOuput"
# huggingface transformers repo uses these words
depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
depthwise_seperable_CNN = "depthwise_seperable_CNN"
[default.extend-words]
iy = "iy"
tendencias = "tendencias"
# intel cpu features
tme = "tme"
dout = "dout"
Pn = "Pn"
arange = "arange"
[type.py]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.py.extend-identifiers]
arange = "arange"
NDArray = "NDArray"
EOFError = "EOFError"
[type.py.extend-words]
[type.cpp]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.cpp.extend-identifiers]
countr_one = "countr_one"
[type.cpp.extend-words]
[type.rust]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.rust.extend-identifiers]
flate2 = "flate2"
[type.rust.extend-words]
ser = "ser"
[type.lock]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.lock.extend-identifiers]
[type.lock.extend-words]
[type.jl]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.jl.extend-identifiers]
[type.jl.extend-words]
modul = "modul"
egals = "egals"
usig = "usig"
egal = "egal"
[type.go]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.go.extend-identifiers]
flate = "flate"
[type.go.extend-words]
[type.css]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.css.extend-identifiers]
nd = "nd"
[type.css.extend-words]
[type.man]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.man.extend-identifiers]
Nd = "Nd"
[type.man.extend-words]
[type.cert]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.cert.extend-identifiers]
[type.cert.extend-words]
[type.sh]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.sh.extend-identifiers]
stap = "stap"
ot = "ot"
[type.sh.extend-words]
[type.vimscript]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.vimscript.extend-identifiers]
windo = "windo"
[type.vimscript.extend-words]

View File

@ -961,7 +961,7 @@ class DifferentialFlashAttentionImpl(AttentionImpl):
"... H (two D) -> ... (H two) D", "... H (two D) -> ... (H two) D",
two=2) two=2)
else: # re-use the kv cache, full attention else: # reuse the kv cache, full attention
q = q.view(-1, self.num_heads, self.head_size) q = q.view(-1, self.num_heads, self.head_size)
q1, q2 = self.split_heads(q) q1, q2 = self.split_heads(q)
# kv_cache shape is (2, num_blocks, block_size, num_kv_heads, head_size) # noqa: E501 # kv_cache shape is (2, num_blocks, block_size, num_kv_heads, head_size) # noqa: E501

View File

@ -372,7 +372,7 @@ class OpenAIServingResponses(OpenAIServing):
}) })
# Append the new input. # Append the new input.
# Reponses API supports simple text inputs without chat format. # Responses API supports simple text inputs without chat format.
if isinstance(request.input, str): if isinstance(request.input, str):
messages.append({"role": "user", "content": request.input}) messages.append({"role": "user", "content": request.input})
else: else:

View File

@ -1172,7 +1172,7 @@ def fused_experts(
allow_cutlass_block_scaled_grouped_gemm: bool = False) -> torch.Tensor: allow_cutlass_block_scaled_grouped_gemm: bool = False) -> torch.Tensor:
# For now, disable DeepGemm for small N (<= 512) until better # For now, disable DeepGemm for small N (<= 512) until better
# permute/unpermute ops are available. # permute/unpermute ops are available.
# However, on B200, we use DeepGemm for all cases becuase they only support # However, on B200, we use DeepGemm for all cases because they only support
# E8M0 scale, which means we requantize the weight and input to the specific # E8M0 scale, which means we requantize the weight and input to the specific
# scale. Fallen back to cutlass or triton for some cases would cause # scale. Fallen back to cutlass or triton for some cases would cause
# accuracy issue. # accuracy issue.

View File

@ -193,7 +193,7 @@ class SambaYAttention(nn.Module):
], ],
dim=-1) dim=-1)
attn_output = self.attn(q, k, v) attn_output = self.attn(q, k, v)
else: # re-use the kv cache, full attention else: # reuse the kv cache, full attention
q = self.Wqkv(hidden_states) q = self.Wqkv(hidden_states)
attn_output = self.attn(q, None, None) attn_output = self.attn(q, None, None)
attn_output = attn_output.view(-1, self.num_heads * self.head_dim) attn_output = attn_output.view(-1, self.num_heads * self.head_dim)

View File

@ -394,7 +394,7 @@ def use_cudnn_prefill() -> bool:
# Currently 394MB, this can be tuned based on GEMM sizes used. # Currently 394MB, this can be tuned based on GEMM sizes used.
# Choosen to be the same as sglang: # Chosen to be the same as sglang:
# https://github.com/sgl-project/sglang/blob/766392c6bda2558b61ce6d1c1bfd8081a549e1f1/python/sglang/global_config.py#L37 # https://github.com/sgl-project/sglang/blob/766392c6bda2558b61ce6d1c1bfd8081a549e1f1/python/sglang/global_config.py#L37
FLASHINFER_WORKSPACE_BUFFER_SIZE = 394 * 1024 * 1024 FLASHINFER_WORKSPACE_BUFFER_SIZE = 394 * 1024 * 1024

View File

@ -969,7 +969,7 @@ class TPUModelRunner(LoRAModelRunnerMixin):
else: else:
mm_embeds = [] mm_embeds = []
xm.mark_step() xm.mark_step()
# Prepare inputs, the requests might be splitted into multiple # Prepare inputs, the requests might be split into multiple
# executions, combine the result of each execution. # executions, combine the result of each execution.
start_index = 0 start_index = 0
combined_selected_tokens: list[torch.Tensor] = [] combined_selected_tokens: list[torch.Tensor] = []