fix: typos (#18151)

Signed-off-by: omahs <73983677+omahs@users.noreply.github.com>
This commit is contained in:
omahs 2025-05-15 11:16:15 +02:00 committed by GitHub
parent a8f5aec20a
commit a9944aabfa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 18 additions and 18 deletions

View File

@ -172,7 +172,7 @@ __device__ void paged_attention_kernel(
// Load the query to registers. // Load the query to registers.
// Each thread in a thread group has a different part of the query. // Each thread in a thread group has a different part of the query.
// For example, if the the thread group size is 4, then the first thread in // For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the query, and the second thread // the group has 0, 4, 8, ... th vectors of the query, and the second thread
// has 1, 5, 9, ... th vectors of the query, and so on. NOTE(woosuk): Because // has 1, 5, 9, ... th vectors of the query, and so on. NOTE(woosuk): Because
// q is split from a qkv tensor, it may not be contiguous. // q is split from a qkv tensor, it may not be contiguous.
@ -259,7 +259,7 @@ __device__ void paged_attention_kernel(
// Load a key to registers. // Load a key to registers.
// Each thread in a thread group has a different part of the key. // Each thread in a thread group has a different part of the key.
// For example, if the the thread group size is 4, then the first thread in // For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the key, and the second thread // the group has 0, 4, 8, ... th vectors of the key, and the second thread
// has 1, 5, 9, ... th vectors of the key, and so on. // has 1, 5, 9, ... th vectors of the key, and so on.
for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) { for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) {

View File

@ -68,7 +68,7 @@ def get_current_weather(city: str, state: str, unit: 'str'):
"partly cloudly, with highs in the 90's.") "partly cloudly, with highs in the 90's.")
tool_funtions = {"get_current_weather": get_current_weather} tool_functions = {"get_current_weather": get_current_weather}
tools = [{ tools = [{
"type": "function", "type": "function",
@ -122,7 +122,7 @@ messages.append({
# above defined function # above defined function
tool_calls = json.loads(output) tool_calls = json.loads(output)
tool_answers = [ tool_answers = [
tool_funtions[call['name']](**call['arguments']) for call in tool_calls tool_functions[call['name']](**call['arguments']) for call in tool_calls
] ]
# append the answer as a tool message and let the LLM give you an answer # append the answer as a tool message and let the LLM give you an answer

View File

@ -30,7 +30,7 @@ def test_load_checkpoints_from_huggingface(lora_fixture_name, request):
lora_path = get_adapter_absolute_path(lora_name) lora_path = get_adapter_absolute_path(lora_name)
# lora loading should work for either absolute path and hugggingface id. # lora loading should work for either absolute path and huggingface id.
peft_helper = PEFTHelper.from_local_dir(lora_path, 4096) peft_helper = PEFTHelper.from_local_dir(lora_path, 4096)
lora_model = LoRAModel.from_local_checkpoint( lora_model = LoRAModel.from_local_checkpoint(
lora_path, lora_path,

View File

@ -20,11 +20,11 @@ def test_hf_transfer_auto_activation():
try: try:
# enable hf hub transfer if available # enable hf hub transfer if available
import hf_transfer # type: ignore # noqa import hf_transfer # type: ignore # noqa
HF_TRANFER_ACTIVE = True HF_TRANSFER_ACTIVE = True
except ImportError: except ImportError:
HF_TRANFER_ACTIVE = False HF_TRANSFER_ACTIVE = False
assert (huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == assert (huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER ==
HF_TRANFER_ACTIVE) HF_TRANSFER_ACTIVE)
def test_download_weights_from_hf(): def test_download_weights_from_hf():

View File

@ -297,7 +297,7 @@ class ModelConfig:
- 1K -> 1024\n - 1K -> 1024\n
- 25.6k -> 25,600""" - 25.6k -> 25,600"""
spec_target_max_model_len: Optional[int] = None spec_target_max_model_len: Optional[int] = None
"""Specify the the maximum length for spec decoding draft models.""" """Specify the maximum length for spec decoding draft models."""
quantization: Optional[QuantizationMethods] = None quantization: Optional[QuantizationMethods] = None
"""Method used to quantize the weights. If `None`, we first check the """Method used to quantize the weights. If `None`, we first check the
`quantization_config` attribute in the model config file. If that is `quantization_config` attribute in the model config file. If that is

View File

@ -153,7 +153,7 @@ def _lora_expand(
lora_token_start_loc (torch.Tensor): A cumulative sum of lora_token_start_loc (torch.Tensor): A cumulative sum of
num_tokens_per_lora. lora_token_start_loc[0] is always 0 so that num_tokens_per_lora. lora_token_start_loc[0] is always 0 so that
lora_token_start_loc[i], along with num_tokens_per_lora[i] lora_token_start_loc[i], along with num_tokens_per_lora[i]
identifies the the region in token_indices_sorted_by_lora_ids that identifies the region in token_indices_sorted_by_lora_ids that
LoRA lora_ids[i] should process. LoRA lora_ids[i] should process.
lora_ids (torch.Tensor): LoRA ids to process. lora_ids (torch.Tensor): LoRA ids to process.
no_lora_flag_cpu (torch.Tensor): A CPU tensor of size 1, that indicates no_lora_flag_cpu (torch.Tensor): A CPU tensor of size 1, that indicates

View File

@ -142,7 +142,7 @@ def mamba_v2_sharded_weight_loader(
) -> LoaderFunction: ) -> LoaderFunction:
"""Create a weight loader for mamba v2. This ensures that the projections """Create a weight loader for mamba v2. This ensures that the projections
are correctly sharded so that they can be split into x, B, C. It also are correctly sharded so that they can be split into x, B, C. It also
ensures the the all the groups corresponding to a head shard is placed ensures that all the groups corresponding to a head shard is placed
together with it. together with it.
""" """

View File

@ -21,7 +21,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Inference-only IBM Granite speeech model.""" """Inference-only IBM Granite speech model."""
import math import math
from collections.abc import Iterable, Mapping from collections.abc import Iterable, Mapping
from typing import Optional, TypedDict, Union from typing import Optional, TypedDict, Union
@ -626,7 +626,7 @@ class GraniteSpeechForConditionalGeneration(
audio_embed_sizes: torch.Tensor, audio_embed_sizes: torch.Tensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""Calculate the input features mask, which will generally be used """Calculate the input features mask, which will generally be used
to mask the the padded features for all entries in the batch except to mask the padded features for all entries in the batch except
for those with the most audio features. for those with the most audio features.
Args: Args:

View File

@ -91,9 +91,9 @@ class ConformerEncoderLayer(nn.Module):
if set to True, use GLULinear module, if set to True, use GLULinear module,
otherwise, used GLUPointWiseConv module. otherwise, used GLUPointWiseConv module.
default to False. default to False.
attention_innner_dim: int, optional attention_inner_dim: int, optional
if equal to -1, attention dim for linears k/q/v is if equal to -1, attention dim for linears k/q/v is
equal to d_model. otherwise attention_innner_dim is used. equal to d_model. otherwise attention_inner_dim is used.
default -1. default -1.
attention_glu_type: str, optional attention_glu_type: str, optional
activation function for glu used in the multihead attention, activation function for glu used in the multihead attention,
@ -148,7 +148,7 @@ class ConformerEncoderLayer(nn.Module):
conv_glu_type="sigmoid", conv_glu_type="sigmoid",
bias_in_glu=True, bias_in_glu=True,
linear_glu_in_convm=False, linear_glu_in_convm=False,
attention_innner_dim=-1, attention_inner_dim=-1,
attention_glu_type="swish", attention_glu_type="swish",
activation_checkpointing="", activation_checkpointing="",
export=False, export=False,
@ -169,7 +169,7 @@ class ConformerEncoderLayer(nn.Module):
n_head, n_head,
d_model, d_model,
dropout_rate, dropout_rate,
attention_innner_dim, attention_inner_dim,
attention_glu_type, attention_glu_type,
bias_in_glu, bias_in_glu,
use_pt_scaled_dot_product_attention= use_pt_scaled_dot_product_attention=

View File

@ -72,7 +72,7 @@ class Request:
assert len(self.mm_inputs) == len(self.mm_hashes) assert len(self.mm_inputs) == len(self.mm_hashes)
# Read-only views # Read-only views
# Prevent directly appending to the these lists since # Prevent directly appending to these lists since
# they should also be updated simultaneously. # they should also be updated simultaneously.
self.output_token_ids = ConstantList(self._output_token_ids) self.output_token_ids = ConstantList(self._output_token_ids)
self.all_token_ids = ConstantList(self._all_token_ids) self.all_token_ids = ConstantList(self._all_token_ids)