[Doc]: fix typos in Python comments (#24093)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
Didier Durand 2025-09-03 06:05:45 +02:00 committed by GitHub
parent c4ed78b14f
commit d7e1e59972
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 23 additions and 23 deletions

View File

@ -641,7 +641,7 @@ def test_schedule_decode_blocks_to_copy_update():
# Nothing is preempted. # Nothing is preempted.
assert output.blocks_to_swap_out == [] assert output.blocks_to_swap_out == []
# Since append_slot returns the source -> dist mapping, it should # Since append_slot returns the source -> dist mapping, it should
# applied. # be applied.
assert output.blocks_to_copy == [(2, 3)] assert output.blocks_to_copy == [(2, 3)]

View File

@ -32,7 +32,7 @@ def to_bytes(y, sr):
async def transcribe_audio(client, tokenizer, y, sr): async def transcribe_audio(client, tokenizer, y, sr):
# Send loaded audio directly instead of loading from disk, # Send loaded audio directly instead of loading from disk,
# dont account for that time though # don't account for that time though
with to_bytes(y, sr) as f: with to_bytes(y, sr) as f:
start_time = time.perf_counter() start_time = time.perf_counter()
transcription = await client.audio.transcriptions.create( transcription = await client.audio.transcriptions.create(

View File

@ -224,7 +224,7 @@ async def test_comparison_with_prompt_logprobs_and_logprobs(server):
logprobs_token_ids.append(token_id) logprobs_token_ids.append(token_id)
# When echo=True, the logprobs include both prompt and response tokens # When echo=True, the logprobs include both prompt and response tokens
# The token_ids field should match the the suffix of response portion # The token_ids field should match the suffix of response portion
# The prompt_token_ids should match the prompt portion # The prompt_token_ids should match the prompt portion
assert len(completion.choices[0].token_ids) < len(logprobs_token_ids) assert len(completion.choices[0].token_ids) < len(logprobs_token_ids)
response_token_ids_length = len(completion.choices[0].token_ids) response_token_ids_length = len(completion.choices[0].token_ids)

View File

@ -313,7 +313,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
}], }],
) )
# By default cache_salt in the engine prompt is not set # By default, cache_salt in the engine prompt is not set
with suppress(Exception): with suppress(Exception):
await serving_chat.create_chat_completion(req) await serving_chat.create_chat_completion(req)
assert "cache_salt" not in mock_engine.generate.call_args.args[0] assert "cache_salt" not in mock_engine.generate.call_args.args[0]

View File

@ -1236,7 +1236,7 @@ def baseline_scaled_mm(a: torch.Tensor,
bias: Optional[torch.Tensor] = None) -> torch.Tensor: bias: Optional[torch.Tensor] = None) -> torch.Tensor:
# We treat N-dimensional group scaling as extended numpy-style broadcasting # We treat N-dimensional group scaling as extended numpy-style broadcasting
# in numpy simply stretches dimensions with an extent of 1 to match the # in numpy simply stretches dimensions with an extent of 1 to match
# the target shape by repeating the data along that dimension (broadcasting) # the target shape by repeating the data along that dimension (broadcasting)
# , we extend these semantics to say if the extent of a dimension in the # , we extend these semantics to say if the extent of a dimension in the
# source shape is not 1 and does not match the target shape we repeat each # source shape is not 1 and does not match the target shape we repeat each

View File

@ -458,7 +458,7 @@ def run_dp_sharded_vision_model_vs_direct(local_rank: int, world_size: int,
with torch.inference_mode(): with torch.inference_mode():
sharded_output = run_dp_sharded_vision_model(image_input, vision_model) sharded_output = run_dp_sharded_vision_model(image_input, vision_model)
# Check that the world size is setup correctly # Check that the world size is set up correctly
assert get_tensor_model_parallel_world_size() == world_size assert get_tensor_model_parallel_world_size() == world_size
# Check that the outputs have the same shape # Check that the outputs have the same shape
@ -642,7 +642,7 @@ def run_dp_sharded_mrope_vision_model_vs_direct(local_rank: int,
rope_type="rope_3d") rope_type="rope_3d")
sharded_output = torch.cat(sharded_output, dim=0) sharded_output = torch.cat(sharded_output, dim=0)
# Check that the world size is setup correctly # Check that the world size is set up correctly
assert get_tensor_model_parallel_world_size() == world_size assert get_tensor_model_parallel_world_size() == world_size
# Compare outputs (only on rank 0) # Compare outputs (only on rank 0)

View File

@ -83,7 +83,7 @@ def test_ngram_correctness(
model_name: str, model_name: str,
): ):
''' '''
Compare the outputs of a original LLM and a speculative LLM Compare the outputs of an original LLM and a speculative LLM
should be the same when using ngram speculative decoding. should be the same when using ngram speculative decoding.
''' '''
with monkeypatch.context() as m: with monkeypatch.context() as m:

View File

@ -42,7 +42,7 @@ def test_basic_lifecycle():
engine_core_outputs = scheduler.update_from_output(scheduler_output, engine_core_outputs = scheduler.update_from_output(scheduler_output,
model_runner_output) model_runner_output)
# Ensure the request is finished after 1 tokens. # Ensure the request is finished after 1 token.
assert request.is_finished() assert request.is_finished()
assert request.status == RequestStatus.FINISHED_LENGTH_CAPPED assert request.status == RequestStatus.FINISHED_LENGTH_CAPPED
output = engine_core_outputs[0].outputs[0] output = engine_core_outputs[0].outputs[0]
@ -141,7 +141,7 @@ def test_short_prompt_lifecycle():
def test_prefix_cache_lifecycle(): def test_prefix_cache_lifecycle():
"""Test that remote decode params still works with a prefix cache hit.""" """Test that remote decode params still work with a prefix cache hit."""
vllm_config = create_vllm_config() vllm_config = create_vllm_config()
scheduler = create_scheduler(vllm_config) scheduler = create_scheduler(vllm_config)

View File

@ -187,7 +187,7 @@ def test_tree_attn_correctness() -> None:
dtype=torch.bfloat16, dtype=torch.bfloat16,
) )
# Setup the block table and KV cache for paged KV. # Set up the block table and KV cache for paged KV.
assert max_sequence_length % block_size == 0 assert max_sequence_length % block_size == 0
max_blocks_per_batch = max_sequence_length // block_size max_blocks_per_batch = max_sequence_length // block_size
kv_cache = torch.randn( kv_cache = torch.randn(
@ -222,7 +222,7 @@ def test_tree_attn_correctness() -> None:
num_alloc_blocks_per_batch] = block_ids.view( num_alloc_blocks_per_batch] = block_ids.view(
-1, num_alloc_blocks_per_batch) -1, num_alloc_blocks_per_batch)
# Setup the slot mapping for the input KVs. # Set up the slot mapping for the input KVs.
tree_positions = sequence_position + torch.arange( tree_positions = sequence_position + torch.arange(
0, 0,
tree_size_q, tree_size_q,

View File

@ -239,7 +239,7 @@ def get_adapter_absolute_path(lora_path: str) -> str:
except (HfHubHTTPError, RepositoryNotFoundError, EntryNotFoundError, except (HfHubHTTPError, RepositoryNotFoundError, EntryNotFoundError,
HFValidationError): HFValidationError):
# Handle errors that may occur during the download # Handle errors that may occur during the download
# Return original path instead instead of throwing error here # Return original path instead of throwing error here
logger.exception("Error downloading the HuggingFace model") logger.exception("Error downloading the HuggingFace model")
return lora_path return lora_path

View File

@ -94,7 +94,7 @@ def find_matched_target(
config that a layer corresponds to. config that a layer corresponds to.
Recall that a compressed-tensors configs has a concept of Recall that a compressed-tensors configs has a concept of
config_groups, where each layer can be quantized with with a different config_groups, where each layer can be quantized with a different
scheme. scheme.
targets in each config_group will be a list of either layer names targets in each config_group will be a list of either layer names

View File

@ -213,7 +213,7 @@ class MediaConnector:
image_mode: str = "RGB", image_mode: str = "RGB",
) -> Image.Image: ) -> Image.Image:
""" """
Load a PIL image from a HTTP or base64 data URL. Load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format. By default, the image is converted into RGB format.
""" """
@ -237,7 +237,7 @@ class MediaConnector:
image_mode: str = "RGB", image_mode: str = "RGB",
) -> Image.Image: ) -> Image.Image:
""" """
Asynchronously load a PIL image from a HTTP or base64 data URL. Asynchronously load a PIL image from an HTTP or base64 data URL.
By default, the image is converted into RGB format. By default, the image is converted into RGB format.
""" """
@ -261,7 +261,7 @@ class MediaConnector:
image_mode: str = "RGB", image_mode: str = "RGB",
) -> tuple[npt.NDArray, dict[str, Any]]: ) -> tuple[npt.NDArray, dict[str, Any]]:
""" """
Load video from a HTTP or base64 data URL. Load video from an HTTP or base64 data URL.
""" """
image_io = ImageMediaIO(image_mode=image_mode, image_io = ImageMediaIO(image_mode=image_mode,
**self.media_io_kwargs.get("image", {})) **self.media_io_kwargs.get("image", {}))
@ -281,7 +281,7 @@ class MediaConnector:
image_mode: str = "RGB", image_mode: str = "RGB",
) -> tuple[npt.NDArray, dict[str, Any]]: ) -> tuple[npt.NDArray, dict[str, Any]]:
""" """
Asynchronously load video from a HTTP or base64 data URL. Asynchronously load video from an HTTP or base64 data URL.
By default, the image is converted into RGB format. By default, the image is converted into RGB format.
""" """
@ -370,7 +370,7 @@ def group_mm_inputs_by_modality(
def modality_group_func( def modality_group_func(
mm_input: MultiModalKwargsItems) -> Union[str, int]: mm_input: MultiModalKwargsItems) -> Union[str, int]:
# If the input has multiple modalities, return a id as the unique key # If the input has multiple modalities, return an id as the unique key
# for the mm_input input. # for the mm_input input.
if len(mm_input) > 1: if len(mm_input) > 1:
return id(mm_input) return id(mm_input)

View File

@ -709,7 +709,7 @@ def reorder_batch_to_split_decodes_and_prefills(
for i, req_id in enumerate(input_batch.req_ids): for i, req_id in enumerate(input_batch.req_ids):
num_tokens = scheduler_output.num_scheduled_tokens[req_id] num_tokens = scheduler_output.num_scheduled_tokens[req_id]
# for now treat 1 scheduled token as "decode" even if its not, # for now treat 1 scheduled token as "decode" even if it's not,
# we should update this to something like < 8 in the future but # we should update this to something like < 8 in the future but
# currently the TritonMLA._forward_decode only supports # currently the TritonMLA._forward_decode only supports
# num_tokens = 1 # num_tokens = 1

View File

@ -65,9 +65,9 @@ def get_outlines_cache_path() -> str:
elif xdg_cache_home: elif xdg_cache_home:
return os.path.join(xdg_cache_home, ".cache", "outlines") return os.path.join(xdg_cache_home, ".cache", "outlines")
# If homedir is "/", we may be inside a container, and thus writing to # If homedir is "/", we may be inside a container, and thus writing to
# root would be problematic, so we fallback to using a tempfile. # root would be problematic, so we fall back to using a tempfile.
# Also validate the path exists, since os.path.expanduser does # Also validate the path exists, since os.path.expanduser does
# not garuntee existence. # not guarantee existence.
elif os.path.isdir(home_dir) and home_dir != "/": elif os.path.isdir(home_dir) and home_dir != "/":
# Default Unix fallback: ~/.cache/outlines # Default Unix fallback: ~/.cache/outlines
return os.path.join(home_dir, ".cache", "outlines") return os.path.join(home_dir, ".cache", "outlines")

View File

@ -250,7 +250,7 @@ class TPUWorker:
scheduler_output: "SchedulerOutput", scheduler_output: "SchedulerOutput",
) -> Optional[ModelRunnerOutput]: ) -> Optional[ModelRunnerOutput]:
output = self.model_runner.execute_model(scheduler_output) output = self.model_runner.execute_model(scheduler_output)
# every worker's output is needed when kv_transfer_group is setup # every worker's output is needed when kv_transfer_group is set up
return output if self.is_driver_worker or has_kv_transfer_group( return output if self.is_driver_worker or has_kv_transfer_group(
) else None ) else None