diff --git a/tests/async_engine/test_api_server.py b/tests/async_engine/test_api_server.py index 76c94bdf80ca8..90f63e7ea17db 100644 --- a/tests/async_engine/test_api_server.py +++ b/tests/async_engine/test_api_server.py @@ -98,7 +98,7 @@ def test_api_server(api_server, distributed_executor_backend: str): pool.join() # check cancellation stats - # give it some times to update the stats + # give it some time to update the stats time.sleep(1) num_aborted_requests = requests.get( diff --git a/tests/core/block/e2e/test_correctness.py b/tests/core/block/e2e/test_correctness.py index 93222b564ebe7..8de48ef59a013 100644 --- a/tests/core/block/e2e/test_correctness.py +++ b/tests/core/block/e2e/test_correctness.py @@ -439,10 +439,10 @@ def test_auto_prefix_caching_with_preemption(baseline_llm_generator, @pytest.mark.parametrize("seed", [1]) def test_auto_prefix_caching_after_eviction_start(baseline_llm_generator, test_llm_generator): - """Verify block manager v2 with auto prefix caching could works normal + """Verify block manager v2 with auto prefix caching could work normally even when eviction started. With APC enabled, all blocks are held by native block at the beginning. - Then blocks are managed by evictor instead. If cache hit at the evitor's + Then blocks are managed by evictor instead. If cache hit at the evictor's block, then it could be reused, or we need to recompute its kv cache. """ output_len = 10 diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py index 93ac18dfcc7b4..ba8e31a79feb5 100644 --- a/tests/engine/test_arg_utils.py +++ b/tests/engine/test_arg_utils.py @@ -167,7 +167,7 @@ def test_get_kwargs(): # dict should have json tip in help json_tip = "Should either be a valid JSON string or JSON keys" assert json_tip in kwargs["json_tip"]["help"] - # nested config should should construct the nested config + # nested config should construct the nested config assert kwargs["nested_config"]["type"]('{"field": 2}') == NestedConfig(2) diff --git a/tests/kernels/moe/test_deepep_deepgemm_moe.py b/tests/kernels/moe/test_deepep_deepgemm_moe.py index 36a98522a6588..6558cab6a9eff 100644 --- a/tests/kernels/moe/test_deepep_deepgemm_moe.py +++ b/tests/kernels/moe/test_deepep_deepgemm_moe.py @@ -282,7 +282,7 @@ def triton_impl(a: torch.Tensor, topk_ids: torch.Tensor, a1_scale=a1_scale, block_shape=block_shape, # Make sure this is set to False so we - # dont end up comparing the same implementation. + # don't end up comparing the same implementation. allow_deep_gemm=False) diff --git a/tests/lora/test_add_lora.py b/tests/lora/test_add_lora.py index 44755c603f281..35d0245759154 100644 --- a/tests/lora/test_add_lora.py +++ b/tests/lora/test_add_lora.py @@ -59,10 +59,10 @@ async def requests_processing_time(llm, @pytest.mark.asyncio async def test_add_lora(chatglm3_lora_files): """ - The add_lora function is used to pre-load some LoRA adapters into the + The add_lora function is used to preload some LoRA adapters into the engine in anticipation of future requests using these adapters. To test this functionality, we use the async engine to process some requests - We - do it twice, once with add_lora() pre-loading and once without. + do it twice, once with add_lora() preloading and once without. We measure the request processing time in both cases and expect the time to be lesser in the case with add_lora() calls. diff --git a/tests/lora/test_lora_allowed_token_ids.py b/tests/lora/test_lora_allowed_token_ids.py index 01bc102bd112b..e77eae70445db 100644 --- a/tests/lora/test_lora_allowed_token_ids.py +++ b/tests/lora/test_lora_allowed_token_ids.py @@ -18,7 +18,7 @@ def test_allowed_token_ids_with_lora_vocab(llama_2_7b_base_huggingface_id, adapters that define additional tokens. """ - # Setup a base model compatible with the sql_lora_files adapter and + # Set up a base model compatible with the sql_lora_files adapter and # a known number of tokens in the base model. model_config = ModelConfig( model=llama_2_7b_base_huggingface_id, @@ -84,7 +84,7 @@ def test_allowed_token_ids_with_lora_adapter_no_vocab( adapters that do not define additional tokens. """ - # Setup a base model compatible with the qwen25vl_lora_files adapter and + # Set up a base model compatible with the qwen25vl_lora_files adapter and # a known number of tokens in the base model. model_config = ModelConfig( model=qwen25vl_base_huggingface_id, diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py index 4c4434c94145a..8a04946b2ffb3 100644 --- a/tests/models/language/generation/test_common.py +++ b/tests/models/language/generation/test_common.py @@ -13,7 +13,7 @@ from ...registry import HF_EXAMPLE_MODELS from ...utils import check_logprobs_close # These have unsupported head_dim for FA. We do not -# not have a clean way to fall back, so we fail with +# have a clean way to fall back, so we fail with # a clear msg when it happens. # https://github.com/vllm-project/vllm/issues/14524 REQUIRES_V0 = ["microsoft/phi-2", "stabilityai/stablelm-3b-4e1t"] diff --git a/tests/models/language/generation/test_mistral.py b/tests/models/language/generation/test_mistral.py index af51a60edfd62..845afbfa8a45e 100644 --- a/tests/models/language/generation/test_mistral.py +++ b/tests/models/language/generation/test_mistral.py @@ -20,7 +20,7 @@ MISTRAL_FORMAT_MODELS = [ "mistralai/Mistral-7B-Instruct-v0.3", # uses the v3-Tekken tokenizer "mistralai/Ministral-8B-Instruct-2410", - # Mistral-Nemo is to big for CI, but passes locally + # Mistral-Nemo is too big for CI, but passes locally # "mistralai/Mistral-Nemo-Instruct-2407" ] @@ -273,7 +273,7 @@ def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None: def test_mistral_function_call_nested_json(): - """Ensure that the function-name regex captures the entire outer-most + """Ensure that the function-name regex captures the entire outermost JSON block, including nested braces.""" # Create a minimal stub tokenizer that provides the few attributes the diff --git a/tests/models/multimodal/generation/test_qwen2_vl.py b/tests/models/multimodal/generation/test_qwen2_vl.py index c61c27ae204a3..a81f5e7ec8872 100644 --- a/tests/models/multimodal/generation/test_qwen2_vl.py +++ b/tests/models/multimodal/generation/test_qwen2_vl.py @@ -154,7 +154,7 @@ def batch_make_image_embeddings( embed_counter += cur_batch_embed_len image_counter += cur_batch_image_count - # ensure we don't lost any images or embeddings + # ensure we don't lose any images or embeddings assert embed_counter == image_embeds.size(0) assert image_counter == image_grid_thw.size(0) assert len(image_batches) == len(result) @@ -238,7 +238,7 @@ def batch_make_video_embeddings( embed_counter += cur_batch_embed_len video_counter += cur_batch_video_count - # ensure we don't lost any videos or embeddings + # ensure we don't lose any videos or embeddings assert embed_counter == video_embeds.size(0) assert video_counter == video_grid_thw.size(0) assert len(video_batches) == len(result) diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index e738f2bd46472..4d0a26f76e98e 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -247,7 +247,7 @@ def test_free_kv_cache_block_queue_append_n(): def test_free_kv_cache_block_queue_popleft_n(): blocks = [KVCacheBlock(block_id=i) for i in range(6)] - # Create a empty FreeKVCacheBlockQueue with these blocks + # Create an empty FreeKVCacheBlockQueue with these blocks queue = FreeKVCacheBlockQueue( [blocks[1], blocks[3], blocks[5], blocks[4], blocks[0], blocks[2]]) assert queue.num_free_blocks == 6 diff --git a/tests/v1/executor/test_executor.py b/tests/v1/executor/test_executor.py index bdd5155c1481d..4e83e2f9d4b63 100644 --- a/tests/v1/executor/test_executor.py +++ b/tests/v1/executor/test_executor.py @@ -27,7 +27,7 @@ class CustomMultiprocExecutor(MultiprocExecutor): kwargs: Optional[dict] = None, non_block: bool = False, unique_reply_rank: Optional[int] = None) -> list[Any]: - # Drop marker to show that this was ran + # Drop marker to show that this was run with open(".marker", "w"): ... return super().collective_rpc(method, timeout, args, kwargs) diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py index 7b8445a0b2878..46e3a611c6d26 100644 --- a/tests/v1/spec_decode/test_eagle.py +++ b/tests/v1/spec_decode/test_eagle.py @@ -183,7 +183,7 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method, mock_pp_group.world_size = pp_size mock_get_pp_group.return_value = mock_pp_group - # Setup the target model mock with a custom class so that + # Set up the target model mock with a custom class so that # isinstance() checks match the expected type. class _TargetModelStub(LlamaForCausalLM): model: mock.MagicMock diff --git a/tests/v1/test_kv_sharing.py b/tests/v1/test_kv_sharing.py index 6b01b7d3e1d6c..96848047145b6 100644 --- a/tests/v1/test_kv_sharing.py +++ b/tests/v1/test_kv_sharing.py @@ -30,7 +30,7 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups(): } # Layers 0 and 1 both belong in KV cache group 0 - # However, if they have have different attention backends, they will be + # However, if they have different attention backends, they will be # placed in different attention groups for KV cache group 0 kv_cache_groups = [ KVCacheGroupSpec(["model.layers.0", "model.layers.1"], diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py index d6cd03fb01a73..6d99029e404ef 100644 --- a/tests/v1/worker/test_gpu_model_runner.py +++ b/tests/v1/worker/test_gpu_model_runner.py @@ -702,7 +702,7 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch): KVCacheTensors for the attention and mamba layers (via _reshape_kv_cache_tensors function). This test verifies that the views are compatible: writing a mamba block - will not corrupt an attention block and vice-versa + will not corrupt an attention block and vice versa ''' current_platform.seed_everything(42)