mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:06:06 +08:00
[Doc]: fix typos in Python comments (#24042)
Signed-off-by: Didier Durand <durand.didier@gmail.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
a344a5aa0a
commit
0235103cbb
@ -78,7 +78,7 @@ class QuickAllReduce:
|
||||
group: the process group to work on. If None, it will use the
|
||||
default process group.
|
||||
device: the device to bind the CustomAllreduce to. If None,
|
||||
it will be bind to f"cuda:{local_rank}".
|
||||
it will be bound to f"cuda:{local_rank}".
|
||||
It is the caller's responsibility to make sure each communicator
|
||||
is bind to a unique device, and all communicators in this group
|
||||
are in the same node.
|
||||
|
||||
@ -186,7 +186,7 @@ class RayPPCommunicator(Communicator):
|
||||
"""
|
||||
Receive a torch.Tensor from a peer and synchronize the current stream.
|
||||
|
||||
After this call returns, the receive buffer is safe to read from from
|
||||
After this call returns, the receive buffer is safe to read from
|
||||
any stream. An RayChannelError will be raised if an error occurred
|
||||
(e.g., remote actor died), and the buffer is not safe to read.
|
||||
|
||||
|
||||
@ -161,7 +161,7 @@ async def write_local_file(output_path: str,
|
||||
batch_outputs: The list of batch outputs to write.
|
||||
"""
|
||||
# We should make this async, but as long as run_batch runs as a
|
||||
# standalone program, blocking the event loop won't effect performance.
|
||||
# standalone program, blocking the event loop won't affect performance.
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
for o in batch_outputs:
|
||||
print(o.model_dump_json(), file=f)
|
||||
|
||||
@ -728,7 +728,7 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
for response_msg in request.input:
|
||||
messages.append(
|
||||
parse_response_input(response_msg, prev_outputs))
|
||||
# User passes in a a tool call request and its output. We need
|
||||
# User passes in a tool call request and its output. We need
|
||||
# to add the tool call request to prev_outputs so that the
|
||||
# parse_response_input can find the tool call request when
|
||||
# parsing the tool call output.
|
||||
|
||||
@ -223,7 +223,7 @@ def _wait_until_pg_ready(current_placement_group: "PlacementGroup"):
|
||||
|
||||
"""
|
||||
# Wait until PG is ready - this will block until all
|
||||
# requested resources are available, and will timeout
|
||||
# requested resources are available, and will time out
|
||||
# if they cannot be provisioned.
|
||||
placement_group_specs = current_placement_group.bundle_specs
|
||||
|
||||
|
||||
@ -116,7 +116,7 @@ def _normalize_quant_group_shape(x: torch.Tensor, group_shape: GroupShape):
|
||||
# then we would expand a to:
|
||||
# a = [[1, 1, 2, 2],
|
||||
# [3, 3, 4, 4]]
|
||||
# NOTE this function this function does not explicitly broadcast dimensions
|
||||
# NOTE this function does not explicitly broadcast dimensions
|
||||
# with an extent of 1, since this can be done implicitly by pytorch
|
||||
def group_broadcast(t, shape):
|
||||
for i, s in enumerate(shape):
|
||||
|
||||
@ -185,7 +185,7 @@ _EMBEDDING_MODELS = {
|
||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
|
||||
# Technically PrithviGeoSpatialMAE is a model that works on images, both in
|
||||
# input and output. I am adding it here because it piggy-backs on embedding
|
||||
# input and output. I am adding it here because it piggybacks on embedding
|
||||
# models for the time being.
|
||||
"PrithviGeoSpatialMAE": ("prithvi_geospatial_mae", "PrithviGeoSpatialMAE"),
|
||||
}
|
||||
|
||||
@ -97,7 +97,7 @@ class SamplingMetadataCache:
|
||||
class SamplingMetadata:
|
||||
"""Metadata for input sequences. Used in sampler.
|
||||
|
||||
The usage is as follow;
|
||||
The usage is as follows;
|
||||
```
|
||||
hidden_states = execute_model(...)
|
||||
logits = hidden_states[sampling_metadata.selected_token_indices]
|
||||
|
||||
@ -269,7 +269,7 @@ class ScalarType:
|
||||
|
||||
@classmethod
|
||||
def uint(cls, size_bits: int, bias: Optional[int]) -> 'ScalarType':
|
||||
"""Create a unsigned integer scalar type."""
|
||||
"""Create an unsigned integer scalar type."""
|
||||
ret = cls(0, size_bits, False, bias if bias else 0)
|
||||
ret.id # noqa B018: make sure the id is cached
|
||||
return ret
|
||||
|
||||
@ -1193,7 +1193,7 @@ class HiddenStates(msgspec.Struct, array_like=True,
|
||||
seq_ids are the sequence ids of each entry of the batch
|
||||
dimension of the hidden_states tensor"""
|
||||
# Scorer hidden states. For prefill step, it is used for hidden states of
|
||||
# all tokens, whereas for decode step, it use used for last accepted tokens.
|
||||
# all tokens, whereas for decode step, it is used for last accepted tokens.
|
||||
hidden_states: torch.Tensor
|
||||
# The sequence group metadata list. Only needed for decode step.
|
||||
seq_group_metadata_list: Optional[list[SequenceGroupMetadata]] = None
|
||||
|
||||
@ -815,7 +815,7 @@ class Scheduler(SchedulerInterface):
|
||||
# NOTE: structured_output_request_ids maps
|
||||
# a request's (request that uses structured output)
|
||||
# request_id to its index in the batch.
|
||||
# This will helps us determine to slice the grammar bitmask
|
||||
# This will help us determine to slice the grammar bitmask
|
||||
# and only applies valid mask for requests that
|
||||
# uses structured decoding.
|
||||
structured_output_request_ids: dict[str, int] = {}
|
||||
@ -923,7 +923,7 @@ class Scheduler(SchedulerInterface):
|
||||
request):
|
||||
# NOTE: structured_output_request
|
||||
# should not be None if use_structured_output, we have
|
||||
# check above, so safe to ignore type warning
|
||||
# checked above, so safe to ignore type warning
|
||||
request.structured_output_request.grammar.accept_tokens( # type: ignore[union-attr]
|
||||
req_id, new_token_ids)
|
||||
|
||||
@ -1242,7 +1242,7 @@ class Scheduler(SchedulerInterface):
|
||||
finished_sending reqs to the output.
|
||||
* if finished_sending: free the blocks
|
||||
# if finished_recving: add to state so we can
|
||||
scheduler the request during the next step.
|
||||
schedule the request during the next step.
|
||||
"""
|
||||
|
||||
if self.connector is not None:
|
||||
|
||||
@ -59,7 +59,7 @@ class RequestStateStats:
|
||||
|
||||
num_generation_tokens: int = 0
|
||||
|
||||
# This is a engine frontend timestamp (wall-clock)
|
||||
# This is an engine frontend timestamp (wall-clock)
|
||||
arrival_time: float = 0.0
|
||||
|
||||
# These are engine core timestamps (monotonic)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user