diff --git a/tests/compile/test_basic_correctness.py b/tests/compile/test_basic_correctness.py index f6783704342f6..fd2b1866e62e1 100644 --- a/tests/compile/test_basic_correctness.py +++ b/tests/compile/test_basic_correctness.py @@ -23,7 +23,7 @@ class TestSetting: fullgraph: bool -# we cannot afford testing the full Catesian product +# we cannot afford testing the full Cartesian product # of all models and all levels @pytest.mark.parametrize( "test_setting", diff --git a/tests/kernels/mamba/test_mamba_ssm_ssd.py b/tests/kernels/mamba/test_mamba_ssm_ssd.py index 1ce7f9d85e876..fc60d5ac82b27 100644 --- a/tests/kernels/mamba/test_mamba_ssm_ssd.py +++ b/tests/kernels/mamba/test_mamba_ssm_ssd.py @@ -345,7 +345,7 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): # in the mamba2 ssd kernels, by comparing concatenation (in the sequence # dimension) of chunked results with the full sequence result. # It is different from test_mamba_chunk_scan_cont_batch by: - # 1. Not using the naive torch implementaion (ssd_minimal_discrete) to get + # 1. Not using the naive torch implementation (ssd_minimal_discrete) to get # reference outputs. Instead, it compares chunked kernel outputs to full # sequence kernel outputs. This is the most straightforward way to # assert chunked prefill correctness. diff --git a/vllm/model_executor/layers/fla/ops/cumsum.py b/vllm/model_executor/layers/fla/ops/cumsum.py index 59152e2c845ad..370a45fe16358 100644 --- a/vllm/model_executor/layers/fla/ops/cumsum.py +++ b/vllm/model_executor/layers/fla/ops/cumsum.py @@ -179,7 +179,7 @@ def chunk_local_cumsum_vector( def grid(meta): return (triton.cdiv(meta['S'], meta['BS']), NT, B * H) - # keep cummulative normalizer in fp32 + # keep cumulative normalizer in fp32 # this kernel is equivalent to # g = g.view(B, H, NT, BT, -1).cumsum(-2).view(B, H, T, -1) chunk_local_cumsum_vector_kernel[grid](g_org, diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 5308a1113a1ad..036a281f1d26e 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -1322,7 +1322,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]): k_scale: torch.Tensor, dcp_world_size: int, ): - assert k_scale is None, "DCP not support sacled kvcache now." + assert k_scale is None, "DCP not support scaled kvcache now." assert attn_metadata.prefill is not None prefill_metadata = attn_metadata.prefill assert prefill_metadata.chunked_context is not None diff --git a/vllm/v1/worker/block_table.py b/vllm/v1/worker/block_table.py index 1901de6d2e5b3..194984bf50536 100644 --- a/vllm/v1/worker/block_table.py +++ b/vllm/v1/worker/block_table.py @@ -112,9 +112,9 @@ class BlockTable: # tokens. virtual_block_offsets = positions % virtual_block_size mask = virtual_block_offsets % self.dcp_world_size == self.dcp_rank - # Calcuate local block_offsets + # Calculate local block_offsets block_offsets = virtual_block_offsets // self.dcp_world_size - # Calcuate slot_mapping + # Calculate slot_mapping slot_mapping = block_numbers * self.block_size + block_offsets # Write final slots, use -1 for not-local self.slot_mapping_np[:req_indices.shape[0]] = np.where(