mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 02:43:29 +08:00
[Doc]: fix typos in Python comments (#24042)
Signed-off-by: Didier Durand <durand.didier@gmail.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
a344a5aa0a
commit
0235103cbb
@ -78,7 +78,7 @@ class QuickAllReduce:
|
|||||||
group: the process group to work on. If None, it will use the
|
group: the process group to work on. If None, it will use the
|
||||||
default process group.
|
default process group.
|
||||||
device: the device to bind the CustomAllreduce to. If None,
|
device: the device to bind the CustomAllreduce to. If None,
|
||||||
it will be bind to f"cuda:{local_rank}".
|
it will be bound to f"cuda:{local_rank}".
|
||||||
It is the caller's responsibility to make sure each communicator
|
It is the caller's responsibility to make sure each communicator
|
||||||
is bind to a unique device, and all communicators in this group
|
is bind to a unique device, and all communicators in this group
|
||||||
are in the same node.
|
are in the same node.
|
||||||
|
|||||||
@ -186,7 +186,7 @@ class RayPPCommunicator(Communicator):
|
|||||||
"""
|
"""
|
||||||
Receive a torch.Tensor from a peer and synchronize the current stream.
|
Receive a torch.Tensor from a peer and synchronize the current stream.
|
||||||
|
|
||||||
After this call returns, the receive buffer is safe to read from from
|
After this call returns, the receive buffer is safe to read from
|
||||||
any stream. An RayChannelError will be raised if an error occurred
|
any stream. An RayChannelError will be raised if an error occurred
|
||||||
(e.g., remote actor died), and the buffer is not safe to read.
|
(e.g., remote actor died), and the buffer is not safe to read.
|
||||||
|
|
||||||
|
|||||||
@ -161,7 +161,7 @@ async def write_local_file(output_path: str,
|
|||||||
batch_outputs: The list of batch outputs to write.
|
batch_outputs: The list of batch outputs to write.
|
||||||
"""
|
"""
|
||||||
# We should make this async, but as long as run_batch runs as a
|
# We should make this async, but as long as run_batch runs as a
|
||||||
# standalone program, blocking the event loop won't effect performance.
|
# standalone program, blocking the event loop won't affect performance.
|
||||||
with open(output_path, "w", encoding="utf-8") as f:
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
for o in batch_outputs:
|
for o in batch_outputs:
|
||||||
print(o.model_dump_json(), file=f)
|
print(o.model_dump_json(), file=f)
|
||||||
|
|||||||
@ -728,7 +728,7 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
for response_msg in request.input:
|
for response_msg in request.input:
|
||||||
messages.append(
|
messages.append(
|
||||||
parse_response_input(response_msg, prev_outputs))
|
parse_response_input(response_msg, prev_outputs))
|
||||||
# User passes in a a tool call request and its output. We need
|
# User passes in a tool call request and its output. We need
|
||||||
# to add the tool call request to prev_outputs so that the
|
# to add the tool call request to prev_outputs so that the
|
||||||
# parse_response_input can find the tool call request when
|
# parse_response_input can find the tool call request when
|
||||||
# parsing the tool call output.
|
# parsing the tool call output.
|
||||||
|
|||||||
@ -223,7 +223,7 @@ def _wait_until_pg_ready(current_placement_group: "PlacementGroup"):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
# Wait until PG is ready - this will block until all
|
# Wait until PG is ready - this will block until all
|
||||||
# requested resources are available, and will timeout
|
# requested resources are available, and will time out
|
||||||
# if they cannot be provisioned.
|
# if they cannot be provisioned.
|
||||||
placement_group_specs = current_placement_group.bundle_specs
|
placement_group_specs = current_placement_group.bundle_specs
|
||||||
|
|
||||||
|
|||||||
@ -116,7 +116,7 @@ def _normalize_quant_group_shape(x: torch.Tensor, group_shape: GroupShape):
|
|||||||
# then we would expand a to:
|
# then we would expand a to:
|
||||||
# a = [[1, 1, 2, 2],
|
# a = [[1, 1, 2, 2],
|
||||||
# [3, 3, 4, 4]]
|
# [3, 3, 4, 4]]
|
||||||
# NOTE this function this function does not explicitly broadcast dimensions
|
# NOTE this function does not explicitly broadcast dimensions
|
||||||
# with an extent of 1, since this can be done implicitly by pytorch
|
# with an extent of 1, since this can be done implicitly by pytorch
|
||||||
def group_broadcast(t, shape):
|
def group_broadcast(t, shape):
|
||||||
for i, s in enumerate(shape):
|
for i, s in enumerate(shape):
|
||||||
|
|||||||
@ -185,7 +185,7 @@ _EMBEDDING_MODELS = {
|
|||||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||||
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
|
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
|
||||||
# Technically PrithviGeoSpatialMAE is a model that works on images, both in
|
# Technically PrithviGeoSpatialMAE is a model that works on images, both in
|
||||||
# input and output. I am adding it here because it piggy-backs on embedding
|
# input and output. I am adding it here because it piggybacks on embedding
|
||||||
# models for the time being.
|
# models for the time being.
|
||||||
"PrithviGeoSpatialMAE": ("prithvi_geospatial_mae", "PrithviGeoSpatialMAE"),
|
"PrithviGeoSpatialMAE": ("prithvi_geospatial_mae", "PrithviGeoSpatialMAE"),
|
||||||
}
|
}
|
||||||
|
|||||||
@ -97,7 +97,7 @@ class SamplingMetadataCache:
|
|||||||
class SamplingMetadata:
|
class SamplingMetadata:
|
||||||
"""Metadata for input sequences. Used in sampler.
|
"""Metadata for input sequences. Used in sampler.
|
||||||
|
|
||||||
The usage is as follow;
|
The usage is as follows;
|
||||||
```
|
```
|
||||||
hidden_states = execute_model(...)
|
hidden_states = execute_model(...)
|
||||||
logits = hidden_states[sampling_metadata.selected_token_indices]
|
logits = hidden_states[sampling_metadata.selected_token_indices]
|
||||||
|
|||||||
@ -269,7 +269,7 @@ class ScalarType:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def uint(cls, size_bits: int, bias: Optional[int]) -> 'ScalarType':
|
def uint(cls, size_bits: int, bias: Optional[int]) -> 'ScalarType':
|
||||||
"""Create a unsigned integer scalar type."""
|
"""Create an unsigned integer scalar type."""
|
||||||
ret = cls(0, size_bits, False, bias if bias else 0)
|
ret = cls(0, size_bits, False, bias if bias else 0)
|
||||||
ret.id # noqa B018: make sure the id is cached
|
ret.id # noqa B018: make sure the id is cached
|
||||||
return ret
|
return ret
|
||||||
|
|||||||
@ -1193,7 +1193,7 @@ class HiddenStates(msgspec.Struct, array_like=True,
|
|||||||
seq_ids are the sequence ids of each entry of the batch
|
seq_ids are the sequence ids of each entry of the batch
|
||||||
dimension of the hidden_states tensor"""
|
dimension of the hidden_states tensor"""
|
||||||
# Scorer hidden states. For prefill step, it is used for hidden states of
|
# Scorer hidden states. For prefill step, it is used for hidden states of
|
||||||
# all tokens, whereas for decode step, it use used for last accepted tokens.
|
# all tokens, whereas for decode step, it is used for last accepted tokens.
|
||||||
hidden_states: torch.Tensor
|
hidden_states: torch.Tensor
|
||||||
# The sequence group metadata list. Only needed for decode step.
|
# The sequence group metadata list. Only needed for decode step.
|
||||||
seq_group_metadata_list: Optional[list[SequenceGroupMetadata]] = None
|
seq_group_metadata_list: Optional[list[SequenceGroupMetadata]] = None
|
||||||
|
|||||||
@ -815,7 +815,7 @@ class Scheduler(SchedulerInterface):
|
|||||||
# NOTE: structured_output_request_ids maps
|
# NOTE: structured_output_request_ids maps
|
||||||
# a request's (request that uses structured output)
|
# a request's (request that uses structured output)
|
||||||
# request_id to its index in the batch.
|
# request_id to its index in the batch.
|
||||||
# This will helps us determine to slice the grammar bitmask
|
# This will help us determine to slice the grammar bitmask
|
||||||
# and only applies valid mask for requests that
|
# and only applies valid mask for requests that
|
||||||
# uses structured decoding.
|
# uses structured decoding.
|
||||||
structured_output_request_ids: dict[str, int] = {}
|
structured_output_request_ids: dict[str, int] = {}
|
||||||
@ -923,7 +923,7 @@ class Scheduler(SchedulerInterface):
|
|||||||
request):
|
request):
|
||||||
# NOTE: structured_output_request
|
# NOTE: structured_output_request
|
||||||
# should not be None if use_structured_output, we have
|
# should not be None if use_structured_output, we have
|
||||||
# check above, so safe to ignore type warning
|
# checked above, so safe to ignore type warning
|
||||||
request.structured_output_request.grammar.accept_tokens( # type: ignore[union-attr]
|
request.structured_output_request.grammar.accept_tokens( # type: ignore[union-attr]
|
||||||
req_id, new_token_ids)
|
req_id, new_token_ids)
|
||||||
|
|
||||||
@ -1242,7 +1242,7 @@ class Scheduler(SchedulerInterface):
|
|||||||
finished_sending reqs to the output.
|
finished_sending reqs to the output.
|
||||||
* if finished_sending: free the blocks
|
* if finished_sending: free the blocks
|
||||||
# if finished_recving: add to state so we can
|
# if finished_recving: add to state so we can
|
||||||
scheduler the request during the next step.
|
schedule the request during the next step.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.connector is not None:
|
if self.connector is not None:
|
||||||
|
|||||||
@ -59,7 +59,7 @@ class RequestStateStats:
|
|||||||
|
|
||||||
num_generation_tokens: int = 0
|
num_generation_tokens: int = 0
|
||||||
|
|
||||||
# This is a engine frontend timestamp (wall-clock)
|
# This is an engine frontend timestamp (wall-clock)
|
||||||
arrival_time: float = 0.0
|
arrival_time: float = 0.0
|
||||||
|
|
||||||
# These are engine core timestamps (monotonic)
|
# These are engine core timestamps (monotonic)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user