mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 14:55:45 +08:00
[Doc]: fix various typos in multiple files (#23179)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
parent
b6d7d34fc6
commit
22cf679aad
@ -18,7 +18,7 @@ class BeamSearchSequence:
|
|||||||
The text field is optional and will only be filled when the sequence is
|
The text field is optional and will only be filled when the sequence is
|
||||||
about to be returned to the user.
|
about to be returned to the user.
|
||||||
"""
|
"""
|
||||||
# The tokens includes the prompt.
|
# The tokens include the prompt.
|
||||||
tokens: list[int]
|
tokens: list[int]
|
||||||
logprobs: list[dict[int, Logprob]]
|
logprobs: list[dict[int, Logprob]]
|
||||||
lora_request: Optional[LoRARequest] = None
|
lora_request: Optional[LoRARequest] = None
|
||||||
|
|||||||
@ -484,7 +484,7 @@ class VllmBackend:
|
|||||||
|
|
||||||
factors = []
|
factors = []
|
||||||
# 0. factors come from the env, for example, The values of
|
# 0. factors come from the env, for example, The values of
|
||||||
# VLLM_PP_LAYER_PARTITION will affects the computation graph.
|
# VLLM_PP_LAYER_PARTITION will affect the computation graph.
|
||||||
env_hash = envs.compute_hash()
|
env_hash = envs.compute_hash()
|
||||||
factors.append(env_hash)
|
factors.append(env_hash)
|
||||||
|
|
||||||
|
|||||||
@ -605,7 +605,7 @@ class EngineArgs:
|
|||||||
**guided_decoding_kwargs["disable_additional_properties"])
|
**guided_decoding_kwargs["disable_additional_properties"])
|
||||||
guided_decoding_group.add_argument(
|
guided_decoding_group.add_argument(
|
||||||
"--reasoning-parser",
|
"--reasoning-parser",
|
||||||
# This choices is a special case because it's not static
|
# This choice is a special case because it's not static
|
||||||
choices=list(ReasoningParserManager.reasoning_parsers),
|
choices=list(ReasoningParserManager.reasoning_parsers),
|
||||||
**guided_decoding_kwargs["reasoning_backend"])
|
**guided_decoding_kwargs["reasoning_backend"])
|
||||||
|
|
||||||
@ -1047,7 +1047,7 @@ class EngineArgs:
|
|||||||
# details from the config directly
|
# details from the config directly
|
||||||
# no user input required / expected
|
# no user input required / expected
|
||||||
if isinstance(hf_config, SpeculatorsConfig):
|
if isinstance(hf_config, SpeculatorsConfig):
|
||||||
# We create one since we dont create one
|
# We create one since we don't create one
|
||||||
self.speculative_config = {}
|
self.speculative_config = {}
|
||||||
self.speculative_config[
|
self.speculative_config[
|
||||||
"num_speculative_tokens"] = hf_config.num_lookahead_tokens
|
"num_speculative_tokens"] = hf_config.num_lookahead_tokens
|
||||||
@ -1775,7 +1775,7 @@ class AsyncEngineArgs(EngineArgs):
|
|||||||
def add_cli_args(parser: FlexibleArgumentParser,
|
def add_cli_args(parser: FlexibleArgumentParser,
|
||||||
async_args_only: bool = False) -> FlexibleArgumentParser:
|
async_args_only: bool = False) -> FlexibleArgumentParser:
|
||||||
# Initialize plugin to update the parser, for example, The plugin may
|
# Initialize plugin to update the parser, for example, The plugin may
|
||||||
# adding a new kind of quantization method to --quantization argument or
|
# add a new kind of quantization method to --quantization argument or
|
||||||
# a new device to --device argument.
|
# a new device to --device argument.
|
||||||
load_general_plugins()
|
load_general_plugins()
|
||||||
if not async_args_only:
|
if not async_args_only:
|
||||||
|
|||||||
@ -539,7 +539,7 @@ class MQLLMEngineClient(EngineClient):
|
|||||||
if request_id in self.output_queues:
|
if request_id in self.output_queues:
|
||||||
raise ValueError(f"Request {request_id} already exists")
|
raise ValueError(f"Request {request_id} already exists")
|
||||||
|
|
||||||
# 1) Create output queue for this requests.
|
# 1) Create output queue for this request.
|
||||||
queue: asyncio.Queue[Union[RequestOutput,
|
queue: asyncio.Queue[Union[RequestOutput,
|
||||||
BaseException]] = asyncio.Queue()
|
BaseException]] = asyncio.Queue()
|
||||||
self.output_queues[request_id] = queue
|
self.output_queues[request_id] = queue
|
||||||
@ -651,7 +651,7 @@ class MQLLMEngineClient(EngineClient):
|
|||||||
# Uses the same I/O as generate requests
|
# Uses the same I/O as generate requests
|
||||||
request = RPCLoadAdapterRequest(lora_request)
|
request = RPCLoadAdapterRequest(lora_request)
|
||||||
|
|
||||||
# Create output queue for this requests.
|
# Create output queue for this request.
|
||||||
queue: asyncio.Queue[Union[None, BaseException]] = asyncio.Queue()
|
queue: asyncio.Queue[Union[None, BaseException]] = asyncio.Queue()
|
||||||
self.output_queues[request.request_id] = queue
|
self.output_queues[request.request_id] = queue
|
||||||
|
|
||||||
|
|||||||
@ -1330,7 +1330,7 @@ def apply_mistral_chat_template(
|
|||||||
# mistral-common uses assert statements to stop processing of input
|
# mistral-common uses assert statements to stop processing of input
|
||||||
# if input does not comply with the expected format.
|
# if input does not comply with the expected format.
|
||||||
# We convert those assertion errors to ValueErrors so they can be
|
# We convert those assertion errors to ValueErrors so they can be
|
||||||
# are properly caught in the preprocessing_input step
|
# properly caught in the preprocessing_input step
|
||||||
except (AssertionError, MistralCommonException) as e:
|
except (AssertionError, MistralCommonException) as e:
|
||||||
raise ValueError(str(e)) from e
|
raise ValueError(str(e)) from e
|
||||||
|
|
||||||
|
|||||||
@ -2482,7 +2482,7 @@ class PlaceholderModule(_PlaceholderBase):
|
|||||||
A placeholder object to use when a module does not exist.
|
A placeholder object to use when a module does not exist.
|
||||||
|
|
||||||
This enables more informative errors when trying to access attributes
|
This enables more informative errors when trying to access attributes
|
||||||
of a module that does not exists.
|
of a module that does not exist.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, name: str) -> None:
|
def __init__(self, name: str) -> None:
|
||||||
@ -3109,7 +3109,7 @@ class LazyLoader(types.ModuleType):
|
|||||||
"""
|
"""
|
||||||
LazyLoader module borrowed from Tensorflow
|
LazyLoader module borrowed from Tensorflow
|
||||||
https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py
|
https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py
|
||||||
with a addition of "module caching".
|
with an addition of "module caching".
|
||||||
|
|
||||||
Lazily import a module, mainly to avoid pulling in large dependencies.
|
Lazily import a module, mainly to avoid pulling in large dependencies.
|
||||||
Modules such as `xgrammar` might do additional side effects, so we
|
Modules such as `xgrammar` might do additional side effects, so we
|
||||||
|
|||||||
@ -267,7 +267,7 @@ class StructuredOutputManager:
|
|||||||
assert request.structured_output_request is not None
|
assert request.structured_output_request is not None
|
||||||
assert request.structured_output_request.grammar is not None
|
assert request.structured_output_request.grammar is not None
|
||||||
# by default, we should always advance
|
# by default, we should always advance
|
||||||
# for cases that doesn't uses thinking mode.
|
# for cases that don't use thinking mode.
|
||||||
if self.reasoner is not None:
|
if self.reasoner is not None:
|
||||||
structured_req = request.structured_output_request
|
structured_req = request.structured_output_request
|
||||||
|
|
||||||
@ -276,7 +276,7 @@ class StructuredOutputManager:
|
|||||||
|
|
||||||
# Check if reasoning ends in *this* step
|
# Check if reasoning ends in *this* step
|
||||||
if self.reasoner.is_reasoning_end(request.all_token_ids):
|
if self.reasoner.is_reasoning_end(request.all_token_ids):
|
||||||
# Reasoning just ended, so we shouldn't advanced til
|
# Reasoning just ended, so we shouldn't advance til
|
||||||
# next pass
|
# next pass
|
||||||
structured_req.reasoning_ended = True
|
structured_req.reasoning_ended = True
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user