mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 17:24:30 +08:00
[Doc]: fixing doc typos (#24635)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
parent
41329a0ff9
commit
e2b1f863aa
@ -3881,7 +3881,7 @@ def contains_object_print(text):
|
|||||||
Check if the text looks like a printed Python object, e.g.
|
Check if the text looks like a printed Python object, e.g.
|
||||||
contains any substring matching the pattern: "at 0xFFFFFFF>"
|
contains any substring matching the pattern: "at 0xFFFFFFF>"
|
||||||
We match against 0x followed by 2-16 hex chars (there's
|
We match against 0x followed by 2-16 hex chars (there's
|
||||||
a max of 16 on a 64 bit system).
|
a max of 16 on a 64-bit system).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text (str): The text to check
|
text (str): The text to check
|
||||||
|
|||||||
@ -60,7 +60,7 @@ class Internlm2ToolParser(ToolParser):
|
|||||||
if '<|action_start|>' not in current_text:
|
if '<|action_start|>' not in current_text:
|
||||||
self.position = len(current_text)
|
self.position = len(current_text)
|
||||||
return DeltaMessage(content=delta_text)
|
return DeltaMessage(content=delta_text)
|
||||||
# if the tool call is sended, return an empty delta message
|
# if the tool call is sent, return an empty delta message
|
||||||
# to make sure the finish_reason will be sent correctly.
|
# to make sure the finish_reason will be sent correctly.
|
||||||
if self.current_tool_id > 0:
|
if self.current_tool_id > 0:
|
||||||
return DeltaMessage(content='')
|
return DeltaMessage(content='')
|
||||||
|
|||||||
@ -502,7 +502,7 @@ def _chunk_state_varlen_kernel(
|
|||||||
dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize
|
dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize
|
||||||
|
|
||||||
# If the sequence starts after the last chunk idx, we don't need to add the contribution from the last chunk
|
# If the sequence starts after the last chunk idx, we don't need to add the contribution from the last chunk
|
||||||
# If HAS_INITSTATES==True need to consider two possiblties
|
# If HAS_INITSTATES==True need to consider two possibilities
|
||||||
# - if start_idx < pid_c * chunk_size, then we need to take the past_states_ptrs
|
# - if start_idx < pid_c * chunk_size, then we need to take the past_states_ptrs
|
||||||
# - if state_idx >= pid * chunk_size, then we need to insert initstates
|
# - if state_idx >= pid * chunk_size, then we need to insert initstates
|
||||||
if ((start_idx < pid_c * chunk_size) # first chunk
|
if ((start_idx < pid_c * chunk_size) # first chunk
|
||||||
|
|||||||
@ -342,7 +342,7 @@ class ArceeModel(nn.Module):
|
|||||||
class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
|
class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
|
||||||
"""Arcee Model for causal language modeling, integrated with vLLM
|
"""Arcee Model for causal language modeling, integrated with vLLM
|
||||||
runtime."""
|
runtime."""
|
||||||
# Map fused module names to their sub-module components
|
# Map fused module names to their submodule components
|
||||||
# (for quantization and LoRA)
|
# (for quantization and LoRA)
|
||||||
packed_modules_mapping = {
|
packed_modules_mapping = {
|
||||||
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
|
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
|
||||||
|
|||||||
@ -835,7 +835,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# The result multimodal_embeddings is tuple of tensors, with each
|
# The result multimodal_embeddings is tuple of tensors, with each
|
||||||
# tensor correspoending to a multimodal data item (image or video).
|
# tensor corresponding to a multimodal data item (image or video).
|
||||||
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
||||||
|
|
||||||
# NOTE: It is important to iterate over the keys in this dictionary
|
# NOTE: It is important to iterate over the keys in this dictionary
|
||||||
|
|||||||
@ -1350,7 +1350,7 @@ class Phi4MultimodalForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# The result multimodal_embeddings is tuple of tensors, with each
|
# The result multimodal_embeddings is tuple of tensors, with each
|
||||||
# tensor correspoending to a multimodal data item (image or video).
|
# tensor corresponding to a multimodal data item (image or video).
|
||||||
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
||||||
|
|
||||||
# NOTE: It is important to iterate over the keys in this dictionary
|
# NOTE: It is important to iterate over the keys in this dictionary
|
||||||
|
|||||||
@ -100,7 +100,7 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
activation function for glu used in the multihead attention,
|
activation function for glu used in the multihead attention,
|
||||||
default "swish".
|
default "swish".
|
||||||
activation_checkpointing: str, optional
|
activation_checkpointing: str, optional
|
||||||
a dictionarry of {"module","interval","offload"}, where
|
a dictionary of {"module","interval","offload"}, where
|
||||||
"module": str
|
"module": str
|
||||||
accept ["transformer", "attention"] to select
|
accept ["transformer", "attention"] to select
|
||||||
which module should do activation checkpointing.
|
which module should do activation checkpointing.
|
||||||
|
|||||||
@ -846,7 +846,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
# The result multimodal_embeddings is tuple of tensors, with each
|
# The result multimodal_embeddings is tuple of tensors, with each
|
||||||
# tensor correspoending to a multimodal data item (image or video).
|
# tensor corresponding to a multimodal data item (image or video).
|
||||||
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
||||||
|
|
||||||
# NOTE: It is important to iterate over the keys in this dictionary
|
# NOTE: It is important to iterate over the keys in this dictionary
|
||||||
@ -873,7 +873,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
|
|||||||
if multimodal_embeddings is not None \
|
if multimodal_embeddings is not None \
|
||||||
and len(multimodal_embeddings) != 0:
|
and len(multimodal_embeddings) != 0:
|
||||||
|
|
||||||
# TODO (ywang96): support overlapping modalitiy embeddings so that
|
# TODO (ywang96): support overlapping modality embeddings so that
|
||||||
# `use_audio_in_video` will work on V1.
|
# `use_audio_in_video` will work on V1.
|
||||||
inputs_embeds = merge_multimodal_embeddings(
|
inputs_embeds = merge_multimodal_embeddings(
|
||||||
input_ids, inputs_embeds, multimodal_embeddings, [
|
input_ids, inputs_embeds, multimodal_embeddings, [
|
||||||
|
|||||||
@ -463,7 +463,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
|
|||||||
self.dcp_world_size = 1
|
self.dcp_world_size = 1
|
||||||
self.dcp_rank = 0
|
self.dcp_rank = 0
|
||||||
|
|
||||||
# Dont try to access the runner on AMD
|
# Don't try to access the runner on AMD
|
||||||
if self.aot_schedule:
|
if self.aot_schedule:
|
||||||
self.page_size = self.kv_cache_spec.block_size
|
self.page_size = self.kv_cache_spec.block_size
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user