mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-26 16:01:21 +08:00
[Doc]: fix typos in Python comments (#24026)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
parent
dc1a53186d
commit
107284959a
@ -23,7 +23,7 @@ def create_test_prompts(
|
|||||||
2 requests for base model, 4 requests for the LoRA. We define 2
|
2 requests for base model, 4 requests for the LoRA. We define 2
|
||||||
different LoRA adapters (using the same model for demo purposes).
|
different LoRA adapters (using the same model for demo purposes).
|
||||||
Since we also set `max_loras=1`, the expectation is that the requests
|
Since we also set `max_loras=1`, the expectation is that the requests
|
||||||
with the second LoRA adapter will be ran after all requests with the
|
with the second LoRA adapter will be run after all requests with the
|
||||||
first adapter have finished.
|
first adapter have finished.
|
||||||
"""
|
"""
|
||||||
return [
|
return [
|
||||||
|
|||||||
@ -31,7 +31,7 @@ class PyNcclCommunicator:
|
|||||||
group: the process group to work on. If None, it will use the
|
group: the process group to work on. If None, it will use the
|
||||||
default process group.
|
default process group.
|
||||||
device: the device to bind the PyNcclCommunicator to. If None,
|
device: the device to bind the PyNcclCommunicator to. If None,
|
||||||
it will be bind to f"cuda:{local_rank}".
|
it will be bound to f"cuda:{local_rank}".
|
||||||
library_path: the path to the NCCL library. If None, it will
|
library_path: the path to the NCCL library. If None, it will
|
||||||
use the default library path.
|
use the default library path.
|
||||||
It is the caller's responsibility to make sure each communicator
|
It is the caller's responsibility to make sure each communicator
|
||||||
|
|||||||
@ -939,8 +939,8 @@ def get_pipeline_model_parallel_group():
|
|||||||
def graph_capture(device: torch.device):
|
def graph_capture(device: torch.device):
|
||||||
"""
|
"""
|
||||||
`graph_capture` is a context manager which should surround the code that
|
`graph_capture` is a context manager which should surround the code that
|
||||||
is capturing the CUDA graph. Its main purpose is to ensure that the
|
is capturing the CUDA graph. Its main purpose is to ensure that some
|
||||||
some operations will be run after the graph is captured, before the graph
|
operations will be run after the graph is captured, before the graph
|
||||||
is replayed. It returns a `GraphCaptureContext` object which contains the
|
is replayed. It returns a `GraphCaptureContext` object which contains the
|
||||||
necessary data for the graph capture. Currently, it only contains the
|
necessary data for the graph capture. Currently, it only contains the
|
||||||
stream that the graph capture is running on. This stream is set to the
|
stream that the graph capture is running on. This stream is set to the
|
||||||
|
|||||||
@ -165,7 +165,7 @@ class PythonicToolParser(ToolParser):
|
|||||||
index] += delta.function.arguments
|
index] += delta.function.arguments
|
||||||
|
|
||||||
# HACK: serving_chat.py inspects the internal state of tool parsers
|
# HACK: serving_chat.py inspects the internal state of tool parsers
|
||||||
# when determining it's final streaming delta, automatically
|
# when determining its final streaming delta, automatically
|
||||||
# adding autocompleted JSON.
|
# adding autocompleted JSON.
|
||||||
# These two lines avoid that nonsense while ensuring finish_reason
|
# These two lines avoid that nonsense while ensuring finish_reason
|
||||||
# is set to tool_calls when at least one tool is called.
|
# is set to tool_calls when at least one tool is called.
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import torch.nn.functional as F
|
|||||||
|
|
||||||
def _histogram(input: torch.Tensor, min: int, max: int) -> torch.Tensor:
|
def _histogram(input: torch.Tensor, min: int, max: int) -> torch.Tensor:
|
||||||
"""
|
"""
|
||||||
Compute the histogram of a int32 tensor. The bin edges are defined by the
|
Compute the histogram of an int32 tensor. The bin edges are defined by the
|
||||||
min and max values, with step = 1.
|
min and max values, with step = 1.
|
||||||
"""
|
"""
|
||||||
assert input.dtype == torch.int32, "input must be of torch.int32 dtype."
|
assert input.dtype == torch.int32, "input must be of torch.int32 dtype."
|
||||||
|
|||||||
@ -544,7 +544,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
|
|||||||
vision_embeddings)
|
vision_embeddings)
|
||||||
input_ids = None
|
input_ids = None
|
||||||
|
|
||||||
# up until here we have a inputs_embeds 100% numerical identity
|
# up until here we have an inputs_embeds 100% numerical identity
|
||||||
# between the OG HF Transformers implementation and ours
|
# between the OG HF Transformers implementation and ours
|
||||||
hidden_states = self.llm(
|
hidden_states = self.llm(
|
||||||
input_ids=input_ids,
|
input_ids=input_ids,
|
||||||
|
|||||||
@ -43,7 +43,7 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
if set different to 0, the number of
|
if set different to 0, the number of
|
||||||
depthwise_seperable_out_channel will be used as a
|
depthwise_seperable_out_channel will be used as a
|
||||||
channel_out of the second conv1d layer.
|
channel_out of the second conv1d layer.
|
||||||
otherwise, it equal to 0, the second conv1d layer is skipped.
|
otherwise, it equals to 0, the second conv1d layer is skipped.
|
||||||
depthwise_multiplier: int
|
depthwise_multiplier: int
|
||||||
number of input_dim channels duplication. this value
|
number of input_dim channels duplication. this value
|
||||||
will be used to compute the hidden channels of the Conv1D.
|
will be used to compute the hidden channels of the Conv1D.
|
||||||
@ -115,7 +115,7 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
we recalculate activation in backward.
|
we recalculate activation in backward.
|
||||||
default "".
|
default "".
|
||||||
export: bool, optional
|
export: bool, optional
|
||||||
if set to True, it remove the padding from convolutional layers
|
if set to True, it removes the padding from convolutional layers
|
||||||
and allow the onnx conversion for inference.
|
and allow the onnx conversion for inference.
|
||||||
default False.
|
default False.
|
||||||
use_pt_scaled_dot_product_attention: bool, optional
|
use_pt_scaled_dot_product_attention: bool, optional
|
||||||
@ -686,7 +686,7 @@ class ConformerEncoder(TransformerEncoderBase):
|
|||||||
only work for glu_in_attention !=0
|
only work for glu_in_attention !=0
|
||||||
default "swish".
|
default "swish".
|
||||||
export: bool, optional
|
export: bool, optional
|
||||||
if set to True, it remove the padding from convolutional layers
|
if set to True, it removes the padding from convolutional layers
|
||||||
and allow the onnx conversion for inference.
|
and allow the onnx conversion for inference.
|
||||||
default False.
|
default False.
|
||||||
activation_checkpointing: str, optional
|
activation_checkpointing: str, optional
|
||||||
|
|||||||
@ -258,7 +258,7 @@ class DepthWiseSeperableConv1d(nn.Module):
|
|||||||
if set different to 0, the number of
|
if set different to 0, the number of
|
||||||
depthwise_seperable_out_channel will be used as a channel_out
|
depthwise_seperable_out_channel will be used as a channel_out
|
||||||
of the second conv1d layer.
|
of the second conv1d layer.
|
||||||
otherwise, it equal to 0, the second conv1d layer is skipped.
|
otherwise, it equals to 0, the second conv1d layer is skipped.
|
||||||
kernel_size: int
|
kernel_size: int
|
||||||
kernel_size
|
kernel_size
|
||||||
depthwise_multiplier: int
|
depthwise_multiplier: int
|
||||||
|
|||||||
2
vllm/third_party/pynvml.py
vendored
2
vllm/third_party/pynvml.py
vendored
@ -1022,7 +1022,7 @@ def _extractNVMLErrorsAsClasses():
|
|||||||
Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate
|
Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate
|
||||||
exceptions more easily.
|
exceptions more easily.
|
||||||
|
|
||||||
NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass.
|
NVMLError is a parent class. Each NVML_ERROR_* gets its own subclass.
|
||||||
e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized
|
e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized
|
||||||
'''
|
'''
|
||||||
this_module = sys.modules[__name__]
|
this_module = sys.modules[__name__]
|
||||||
|
|||||||
@ -26,7 +26,7 @@ logger = logging.get_logger(__name__)
|
|||||||
class NemotronConfig(PretrainedConfig):
|
class NemotronConfig(PretrainedConfig):
|
||||||
r"""
|
r"""
|
||||||
This is the configuration class to store the configuration of a
|
This is the configuration class to store the configuration of a
|
||||||
[`NemotronModel`]. It is used to instantiate an Nemotron model
|
[`NemotronModel`]. It is used to instantiate a Nemotron model
|
||||||
according to the specified arguments, defining the model architecture.
|
according to the specified arguments, defining the model architecture.
|
||||||
Instantiating a configuration with the defaults will yield a similar
|
Instantiating a configuration with the defaults will yield a similar
|
||||||
configuration to that of the Nemotron-8B.
|
configuration to that of the Nemotron-8B.
|
||||||
|
|||||||
@ -38,7 +38,7 @@ class NemotronHConfig(PretrainedConfig):
|
|||||||
passed when calling [`NemotronHModel`]
|
passed when calling [`NemotronHModel`]
|
||||||
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||||
Whether the model's input and output word embeddings should be
|
Whether the model's input and output word embeddings should be
|
||||||
tied. Note that this is only relevant if the model has a output
|
tied. Note that this is only relevant if the model has an output
|
||||||
word embedding layer.
|
word embedding layer.
|
||||||
hidden_size (`int`, *optional*, defaults to 4096):
|
hidden_size (`int`, *optional*, defaults to 4096):
|
||||||
Dimension of the hidden representations.
|
Dimension of the hidden representations.
|
||||||
|
|||||||
@ -55,7 +55,7 @@ class OvisProcessorKwargs(ProcessingKwargs, total=False): # type: ignore[call-
|
|||||||
|
|
||||||
class OvisProcessor(ProcessorMixin):
|
class OvisProcessor(ProcessorMixin):
|
||||||
r"""
|
r"""
|
||||||
Constructs a Ovis processor which wraps a Ovis image processor and a Qwen2 tokenizer into a single processor.
|
Constructs an Ovis processor which wraps an Ovis image processor and a Qwen2 tokenizer into a single processor.
|
||||||
[`OvisProcessor`] offers all the functionalities of [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. See the
|
[`OvisProcessor`] offers all the functionalities of [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. See the
|
||||||
[`~OvisProcessor.__call__`] and [`~OvisProcessor.decode`] for more information.
|
[`~OvisProcessor.__call__`] and [`~OvisProcessor.decode`] for more information.
|
||||||
Args:
|
Args:
|
||||||
|
|||||||
@ -41,7 +41,7 @@ class Ovis2_5ProcessorKwargs(ProcessingKwargs,
|
|||||||
|
|
||||||
class Ovis2_5Processor(ProcessorMixin):
|
class Ovis2_5Processor(ProcessorMixin):
|
||||||
r"""
|
r"""
|
||||||
Constructs a Ovis processor which wraps a Ovis image processor
|
Constructs an Ovis processor which wraps an Ovis image processor
|
||||||
and a Qwen2 tokenizer into a single processor.
|
and a Qwen2 tokenizer into a single processor.
|
||||||
[`OvisProcessor`] offers all the functionalities of
|
[`OvisProcessor`] offers all the functionalities of
|
||||||
[`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`].
|
[`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`].
|
||||||
|
|||||||
@ -107,7 +107,7 @@ def _find_longest_matched_ngram_and_propose_tokens(
|
|||||||
longest_ngram = 0
|
longest_ngram = 0
|
||||||
position = 0
|
position = 0
|
||||||
|
|
||||||
# lps[0] always equal to 0, we starts with index 1
|
# lps[0] always equal to 0, we start with index 1
|
||||||
prev_lps = 0
|
prev_lps = 0
|
||||||
i = 1
|
i = 1
|
||||||
while i < total_token:
|
while i < total_token:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user