fix: typos (#18151)

Signed-off-by: omahs <73983677+omahs@users.noreply.github.com>
This commit is contained in:
omahs 2025-05-15 11:16:15 +02:00 committed by GitHub
parent a8f5aec20a
commit a9944aabfa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 18 additions and 18 deletions

View File

@ -172,7 +172,7 @@ __device__ void paged_attention_kernel(
// Load the query to registers.
// Each thread in a thread group has a different part of the query.
// For example, if the the thread group size is 4, then the first thread in
// For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the query, and the second thread
// has 1, 5, 9, ... th vectors of the query, and so on. NOTE(woosuk): Because
// q is split from a qkv tensor, it may not be contiguous.
@ -259,7 +259,7 @@ __device__ void paged_attention_kernel(
// Load a key to registers.
// Each thread in a thread group has a different part of the key.
// For example, if the the thread group size is 4, then the first thread in
// For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the key, and the second thread
// has 1, 5, 9, ... th vectors of the key, and so on.
for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) {

View File

@ -68,7 +68,7 @@ def get_current_weather(city: str, state: str, unit: 'str'):
"partly cloudly, with highs in the 90's.")
tool_funtions = {"get_current_weather": get_current_weather}
tool_functions = {"get_current_weather": get_current_weather}
tools = [{
"type": "function",
@ -122,7 +122,7 @@ messages.append({
# above defined function
tool_calls = json.loads(output)
tool_answers = [
tool_funtions[call['name']](**call['arguments']) for call in tool_calls
tool_functions[call['name']](**call['arguments']) for call in tool_calls
]
# append the answer as a tool message and let the LLM give you an answer

View File

@ -30,7 +30,7 @@ def test_load_checkpoints_from_huggingface(lora_fixture_name, request):
lora_path = get_adapter_absolute_path(lora_name)
# lora loading should work for either absolute path and hugggingface id.
# lora loading should work for either absolute path and huggingface id.
peft_helper = PEFTHelper.from_local_dir(lora_path, 4096)
lora_model = LoRAModel.from_local_checkpoint(
lora_path,

View File

@ -20,11 +20,11 @@ def test_hf_transfer_auto_activation():
try:
# enable hf hub transfer if available
import hf_transfer # type: ignore # noqa
HF_TRANFER_ACTIVE = True
HF_TRANSFER_ACTIVE = True
except ImportError:
HF_TRANFER_ACTIVE = False
HF_TRANSFER_ACTIVE = False
assert (huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER ==
HF_TRANFER_ACTIVE)
HF_TRANSFER_ACTIVE)
def test_download_weights_from_hf():

View File

@ -297,7 +297,7 @@ class ModelConfig:
- 1K -> 1024\n
- 25.6k -> 25,600"""
spec_target_max_model_len: Optional[int] = None
"""Specify the the maximum length for spec decoding draft models."""
"""Specify the maximum length for spec decoding draft models."""
quantization: Optional[QuantizationMethods] = None
"""Method used to quantize the weights. If `None`, we first check the
`quantization_config` attribute in the model config file. If that is

View File

@ -153,7 +153,7 @@ def _lora_expand(
lora_token_start_loc (torch.Tensor): A cumulative sum of
num_tokens_per_lora. lora_token_start_loc[0] is always 0 so that
lora_token_start_loc[i], along with num_tokens_per_lora[i]
identifies the the region in token_indices_sorted_by_lora_ids that
identifies the region in token_indices_sorted_by_lora_ids that
LoRA lora_ids[i] should process.
lora_ids (torch.Tensor): LoRA ids to process.
no_lora_flag_cpu (torch.Tensor): A CPU tensor of size 1, that indicates

View File

@ -142,7 +142,7 @@ def mamba_v2_sharded_weight_loader(
) -> LoaderFunction:
"""Create a weight loader for mamba v2. This ensures that the projections
are correctly sharded so that they can be split into x, B, C. It also
ensures the the all the groups corresponding to a head shard is placed
ensures that all the groups corresponding to a head shard is placed
together with it.
"""

View File

@ -21,7 +21,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Inference-only IBM Granite speeech model."""
"""Inference-only IBM Granite speech model."""
import math
from collections.abc import Iterable, Mapping
from typing import Optional, TypedDict, Union
@ -626,7 +626,7 @@ class GraniteSpeechForConditionalGeneration(
audio_embed_sizes: torch.Tensor,
) -> torch.Tensor:
"""Calculate the input features mask, which will generally be used
to mask the the padded features for all entries in the batch except
to mask the padded features for all entries in the batch except
for those with the most audio features.
Args:

View File

@ -91,9 +91,9 @@ class ConformerEncoderLayer(nn.Module):
if set to True, use GLULinear module,
otherwise, used GLUPointWiseConv module.
default to False.
attention_innner_dim: int, optional
attention_inner_dim: int, optional
if equal to -1, attention dim for linears k/q/v is
equal to d_model. otherwise attention_innner_dim is used.
equal to d_model. otherwise attention_inner_dim is used.
default -1.
attention_glu_type: str, optional
activation function for glu used in the multihead attention,
@ -148,7 +148,7 @@ class ConformerEncoderLayer(nn.Module):
conv_glu_type="sigmoid",
bias_in_glu=True,
linear_glu_in_convm=False,
attention_innner_dim=-1,
attention_inner_dim=-1,
attention_glu_type="swish",
activation_checkpointing="",
export=False,
@ -169,7 +169,7 @@ class ConformerEncoderLayer(nn.Module):
n_head,
d_model,
dropout_rate,
attention_innner_dim,
attention_inner_dim,
attention_glu_type,
bias_in_glu,
use_pt_scaled_dot_product_attention=

View File

@ -72,7 +72,7 @@ class Request:
assert len(self.mm_inputs) == len(self.mm_hashes)
# Read-only views
# Prevent directly appending to the these lists since
# Prevent directly appending to these lists since
# they should also be updated simultaneously.
self.output_token_ids = ConstantList(self._output_token_ids)
self.all_token_ids = ConstantList(self._all_token_ids)