mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-08 02:35:41 +08:00
[Doc]: fix typos in Python comments (#24077)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
parent
56d04089ef
commit
fad73be1a5
@ -98,7 +98,7 @@ def test_api_server(api_server, distributed_executor_backend: str):
|
|||||||
pool.join()
|
pool.join()
|
||||||
|
|
||||||
# check cancellation stats
|
# check cancellation stats
|
||||||
# give it some times to update the stats
|
# give it some time to update the stats
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
num_aborted_requests = requests.get(
|
num_aborted_requests = requests.get(
|
||||||
|
|||||||
@ -439,10 +439,10 @@ def test_auto_prefix_caching_with_preemption(baseline_llm_generator,
|
|||||||
@pytest.mark.parametrize("seed", [1])
|
@pytest.mark.parametrize("seed", [1])
|
||||||
def test_auto_prefix_caching_after_eviction_start(baseline_llm_generator,
|
def test_auto_prefix_caching_after_eviction_start(baseline_llm_generator,
|
||||||
test_llm_generator):
|
test_llm_generator):
|
||||||
"""Verify block manager v2 with auto prefix caching could works normal
|
"""Verify block manager v2 with auto prefix caching could work normally
|
||||||
even when eviction started.
|
even when eviction started.
|
||||||
With APC enabled, all blocks are held by native block at the beginning.
|
With APC enabled, all blocks are held by native block at the beginning.
|
||||||
Then blocks are managed by evictor instead. If cache hit at the evitor's
|
Then blocks are managed by evictor instead. If cache hit at the evictor's
|
||||||
block, then it could be reused, or we need to recompute its kv cache.
|
block, then it could be reused, or we need to recompute its kv cache.
|
||||||
"""
|
"""
|
||||||
output_len = 10
|
output_len = 10
|
||||||
|
|||||||
@ -167,7 +167,7 @@ def test_get_kwargs():
|
|||||||
# dict should have json tip in help
|
# dict should have json tip in help
|
||||||
json_tip = "Should either be a valid JSON string or JSON keys"
|
json_tip = "Should either be a valid JSON string or JSON keys"
|
||||||
assert json_tip in kwargs["json_tip"]["help"]
|
assert json_tip in kwargs["json_tip"]["help"]
|
||||||
# nested config should should construct the nested config
|
# nested config should construct the nested config
|
||||||
assert kwargs["nested_config"]["type"]('{"field": 2}') == NestedConfig(2)
|
assert kwargs["nested_config"]["type"]('{"field": 2}') == NestedConfig(2)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -282,7 +282,7 @@ def triton_impl(a: torch.Tensor, topk_ids: torch.Tensor,
|
|||||||
a1_scale=a1_scale,
|
a1_scale=a1_scale,
|
||||||
block_shape=block_shape,
|
block_shape=block_shape,
|
||||||
# Make sure this is set to False so we
|
# Make sure this is set to False so we
|
||||||
# dont end up comparing the same implementation.
|
# don't end up comparing the same implementation.
|
||||||
allow_deep_gemm=False)
|
allow_deep_gemm=False)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -59,10 +59,10 @@ async def requests_processing_time(llm,
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_add_lora(chatglm3_lora_files):
|
async def test_add_lora(chatglm3_lora_files):
|
||||||
"""
|
"""
|
||||||
The add_lora function is used to pre-load some LoRA adapters into the
|
The add_lora function is used to preload some LoRA adapters into the
|
||||||
engine in anticipation of future requests using these adapters. To test
|
engine in anticipation of future requests using these adapters. To test
|
||||||
this functionality, we use the async engine to process some requests - We
|
this functionality, we use the async engine to process some requests - We
|
||||||
do it twice, once with add_lora() pre-loading and once without.
|
do it twice, once with add_lora() preloading and once without.
|
||||||
|
|
||||||
We measure the request processing time in both cases and expect the time
|
We measure the request processing time in both cases and expect the time
|
||||||
to be lesser in the case with add_lora() calls.
|
to be lesser in the case with add_lora() calls.
|
||||||
|
|||||||
@ -18,7 +18,7 @@ def test_allowed_token_ids_with_lora_vocab(llama_2_7b_base_huggingface_id,
|
|||||||
adapters that define additional tokens.
|
adapters that define additional tokens.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Setup a base model compatible with the sql_lora_files adapter and
|
# Set up a base model compatible with the sql_lora_files adapter and
|
||||||
# a known number of tokens in the base model.
|
# a known number of tokens in the base model.
|
||||||
model_config = ModelConfig(
|
model_config = ModelConfig(
|
||||||
model=llama_2_7b_base_huggingface_id,
|
model=llama_2_7b_base_huggingface_id,
|
||||||
@ -84,7 +84,7 @@ def test_allowed_token_ids_with_lora_adapter_no_vocab(
|
|||||||
adapters that do not define additional tokens.
|
adapters that do not define additional tokens.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Setup a base model compatible with the qwen25vl_lora_files adapter and
|
# Set up a base model compatible with the qwen25vl_lora_files adapter and
|
||||||
# a known number of tokens in the base model.
|
# a known number of tokens in the base model.
|
||||||
model_config = ModelConfig(
|
model_config = ModelConfig(
|
||||||
model=qwen25vl_base_huggingface_id,
|
model=qwen25vl_base_huggingface_id,
|
||||||
|
|||||||
@ -13,7 +13,7 @@ from ...registry import HF_EXAMPLE_MODELS
|
|||||||
from ...utils import check_logprobs_close
|
from ...utils import check_logprobs_close
|
||||||
|
|
||||||
# These have unsupported head_dim for FA. We do not
|
# These have unsupported head_dim for FA. We do not
|
||||||
# not have a clean way to fall back, so we fail with
|
# have a clean way to fall back, so we fail with
|
||||||
# a clear msg when it happens.
|
# a clear msg when it happens.
|
||||||
# https://github.com/vllm-project/vllm/issues/14524
|
# https://github.com/vllm-project/vllm/issues/14524
|
||||||
REQUIRES_V0 = ["microsoft/phi-2", "stabilityai/stablelm-3b-4e1t"]
|
REQUIRES_V0 = ["microsoft/phi-2", "stabilityai/stablelm-3b-4e1t"]
|
||||||
|
|||||||
@ -20,7 +20,7 @@ MISTRAL_FORMAT_MODELS = [
|
|||||||
"mistralai/Mistral-7B-Instruct-v0.3",
|
"mistralai/Mistral-7B-Instruct-v0.3",
|
||||||
# uses the v3-Tekken tokenizer
|
# uses the v3-Tekken tokenizer
|
||||||
"mistralai/Ministral-8B-Instruct-2410",
|
"mistralai/Ministral-8B-Instruct-2410",
|
||||||
# Mistral-Nemo is to big for CI, but passes locally
|
# Mistral-Nemo is too big for CI, but passes locally
|
||||||
# "mistralai/Mistral-Nemo-Instruct-2407"
|
# "mistralai/Mistral-Nemo-Instruct-2407"
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -273,7 +273,7 @@ def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def test_mistral_function_call_nested_json():
|
def test_mistral_function_call_nested_json():
|
||||||
"""Ensure that the function-name regex captures the entire outer-most
|
"""Ensure that the function-name regex captures the entire outermost
|
||||||
JSON block, including nested braces."""
|
JSON block, including nested braces."""
|
||||||
|
|
||||||
# Create a minimal stub tokenizer that provides the few attributes the
|
# Create a minimal stub tokenizer that provides the few attributes the
|
||||||
|
|||||||
@ -154,7 +154,7 @@ def batch_make_image_embeddings(
|
|||||||
embed_counter += cur_batch_embed_len
|
embed_counter += cur_batch_embed_len
|
||||||
image_counter += cur_batch_image_count
|
image_counter += cur_batch_image_count
|
||||||
|
|
||||||
# ensure we don't lost any images or embeddings
|
# ensure we don't lose any images or embeddings
|
||||||
assert embed_counter == image_embeds.size(0)
|
assert embed_counter == image_embeds.size(0)
|
||||||
assert image_counter == image_grid_thw.size(0)
|
assert image_counter == image_grid_thw.size(0)
|
||||||
assert len(image_batches) == len(result)
|
assert len(image_batches) == len(result)
|
||||||
@ -238,7 +238,7 @@ def batch_make_video_embeddings(
|
|||||||
embed_counter += cur_batch_embed_len
|
embed_counter += cur_batch_embed_len
|
||||||
video_counter += cur_batch_video_count
|
video_counter += cur_batch_video_count
|
||||||
|
|
||||||
# ensure we don't lost any videos or embeddings
|
# ensure we don't lose any videos or embeddings
|
||||||
assert embed_counter == video_embeds.size(0)
|
assert embed_counter == video_embeds.size(0)
|
||||||
assert video_counter == video_grid_thw.size(0)
|
assert video_counter == video_grid_thw.size(0)
|
||||||
assert len(video_batches) == len(result)
|
assert len(video_batches) == len(result)
|
||||||
|
|||||||
@ -247,7 +247,7 @@ def test_free_kv_cache_block_queue_append_n():
|
|||||||
|
|
||||||
def test_free_kv_cache_block_queue_popleft_n():
|
def test_free_kv_cache_block_queue_popleft_n():
|
||||||
blocks = [KVCacheBlock(block_id=i) for i in range(6)]
|
blocks = [KVCacheBlock(block_id=i) for i in range(6)]
|
||||||
# Create a empty FreeKVCacheBlockQueue with these blocks
|
# Create an empty FreeKVCacheBlockQueue with these blocks
|
||||||
queue = FreeKVCacheBlockQueue(
|
queue = FreeKVCacheBlockQueue(
|
||||||
[blocks[1], blocks[3], blocks[5], blocks[4], blocks[0], blocks[2]])
|
[blocks[1], blocks[3], blocks[5], blocks[4], blocks[0], blocks[2]])
|
||||||
assert queue.num_free_blocks == 6
|
assert queue.num_free_blocks == 6
|
||||||
|
|||||||
@ -27,7 +27,7 @@ class CustomMultiprocExecutor(MultiprocExecutor):
|
|||||||
kwargs: Optional[dict] = None,
|
kwargs: Optional[dict] = None,
|
||||||
non_block: bool = False,
|
non_block: bool = False,
|
||||||
unique_reply_rank: Optional[int] = None) -> list[Any]:
|
unique_reply_rank: Optional[int] = None) -> list[Any]:
|
||||||
# Drop marker to show that this was ran
|
# Drop marker to show that this was run
|
||||||
with open(".marker", "w"):
|
with open(".marker", "w"):
|
||||||
...
|
...
|
||||||
return super().collective_rpc(method, timeout, args, kwargs)
|
return super().collective_rpc(method, timeout, args, kwargs)
|
||||||
|
|||||||
@ -183,7 +183,7 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method,
|
|||||||
mock_pp_group.world_size = pp_size
|
mock_pp_group.world_size = pp_size
|
||||||
mock_get_pp_group.return_value = mock_pp_group
|
mock_get_pp_group.return_value = mock_pp_group
|
||||||
|
|
||||||
# Setup the target model mock with a custom class so that
|
# Set up the target model mock with a custom class so that
|
||||||
# isinstance() checks match the expected type.
|
# isinstance() checks match the expected type.
|
||||||
class _TargetModelStub(LlamaForCausalLM):
|
class _TargetModelStub(LlamaForCausalLM):
|
||||||
model: mock.MagicMock
|
model: mock.MagicMock
|
||||||
|
|||||||
@ -30,7 +30,7 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Layers 0 and 1 both belong in KV cache group 0
|
# Layers 0 and 1 both belong in KV cache group 0
|
||||||
# However, if they have have different attention backends, they will be
|
# However, if they have different attention backends, they will be
|
||||||
# placed in different attention groups for KV cache group 0
|
# placed in different attention groups for KV cache group 0
|
||||||
kv_cache_groups = [
|
kv_cache_groups = [
|
||||||
KVCacheGroupSpec(["model.layers.0", "model.layers.1"],
|
KVCacheGroupSpec(["model.layers.0", "model.layers.1"],
|
||||||
|
|||||||
@ -702,7 +702,7 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
|
|||||||
KVCacheTensors for the attention and mamba layers
|
KVCacheTensors for the attention and mamba layers
|
||||||
(via _reshape_kv_cache_tensors function). This test verifies
|
(via _reshape_kv_cache_tensors function). This test verifies
|
||||||
that the views are compatible: writing a mamba block
|
that the views are compatible: writing a mamba block
|
||||||
will not corrupt an attention block and vice-versa
|
will not corrupt an attention block and vice versa
|
||||||
'''
|
'''
|
||||||
|
|
||||||
current_platform.seed_everything(42)
|
current_platform.seed_everything(42)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user