[Doc]: fix typos in various files (#24798)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
Didier Durand 2025-09-13 09:43:33 +02:00 committed by GitHub
parent 4dad72f0d9
commit 41ae4a1eab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 13 additions and 13 deletions

View File

@ -42,7 +42,7 @@ def main():
llm_args["model"] = "meta-llama/Llama-3.1-8B-Instruct" llm_args["model"] = "meta-llama/Llama-3.1-8B-Instruct"
# Set `enforce_eager=True` to avoid ahead-of-time compilation. # Set `enforce_eager=True` to avoid ahead-of-time compilation.
# In real workloads, `enforace_eager` should be `False`. # In real workloads, `enforce_eager` should be `False`.
llm = LLM(**llm_args) llm = LLM(**llm_args)
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
print("-" * 50) print("-" * 50)

View File

@ -182,7 +182,7 @@ class NaiveBlockAllocator(BlockAllocator):
# Increment refcount for each block. # Increment refcount for each block.
assert block.block_id is not None assert block.block_id is not None
refcount = self._refcounter.incr(block.block_id) refcount = self._refcounter.incr(block.block_id)
assert refcount != 1, "can't fork free'd block" assert refcount != 1, "can't fork freed block"
forked_block = self._block_pool.init_block( forked_block = self._block_pool.init_block(
prev_block=prev_block, prev_block=prev_block,

View File

@ -58,7 +58,7 @@ class Evictor(ABC):
class BlockMetaData: class BlockMetaData:
"""Data structure for storing key data describe cached block, so that """Data structure for storing key data describe cached block, so that
evitor could use to make its decision which one to choose for eviction evictor could use to make its decision which one to choose for eviction
Here we use physical block id as the dict key, as there maybe several Here we use physical block id as the dict key, as there maybe several
blocks with the same content hash, but their physical id is unique. blocks with the same content hash, but their physical id is unique.

View File

@ -379,7 +379,7 @@ class LoggingStatLogger(StatLoggerBase):
if local_interval_elapsed(stats.now, self.last_local_log, if local_interval_elapsed(stats.now, self.last_local_log,
self.local_interval): self.local_interval):
# Compute summary metrics for tracked stats (and log them # Compute summary metrics for tracked stats (and log them
# to promethus if applicable). # to prometheus if applicable).
prompt_throughput = get_throughput(self.num_prompt_tokens, prompt_throughput = get_throughput(self.num_prompt_tokens,
now=stats.now, now=stats.now,
last_log=self.last_local_log) last_log=self.last_local_log)
@ -432,7 +432,7 @@ class LoggingStatLogger(StatLoggerBase):
class PrometheusStatLogger(StatLoggerBase): class PrometheusStatLogger(StatLoggerBase):
"""PrometheusStatLogger is used LLMEngine to log to Promethus.""" """PrometheusStatLogger is used LLMEngine to log to Prometheus."""
_metrics_cls = Metrics _metrics_cls = Metrics
_gauge_cls = prometheus_client.Gauge _gauge_cls = prometheus_client.Gauge

View File

@ -740,7 +740,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
""" """
Handle special case for models where MLP layers are already Handle special case for models where MLP layers are already
fused on disk. In this case, we have no shard id. This function fused on disk. In this case, we have no shard id. This function
determmines the shard id by splitting these layers and then calls determines the shard id by splitting these layers and then calls
the weight loader using the shard id. the weight loader using the shard id.
An example of a model with these fused layers: An example of a model with these fused layers:
@ -914,7 +914,7 @@ class QKVParallelLinear(ColumnParallelLinear):
""" """
Handle special case for models where QKV layers are already Handle special case for models where QKV layers are already
fused on disk. In this case, we have no shard id. This function fused on disk. In this case, we have no shard id. This function
determmines the shard id by splitting these layers and then calls determines the shard id by splitting these layers and then calls
the weight loader using the shard id. the weight loader using the shard id.
An example of a model with these fused layers: An example of a model with these fused layers:

View File

@ -258,7 +258,7 @@ class VocabParallelEmbedding(CustomOp):
if params_dtype is None: if params_dtype is None:
params_dtype = torch.get_default_dtype() params_dtype = torch.get_default_dtype()
# Divide the weight matrix along the vocaburaly dimension. # Divide the weight matrix along the vocabulary dimension.
self.num_added_embeddings = self.num_embeddings - self.org_vocab_size self.num_added_embeddings = self.num_embeddings - self.org_vocab_size
self.num_embeddings_per_partition = divide(self.num_embeddings_padded, self.num_embeddings_per_partition = divide(self.num_embeddings_padded,
self.tp_size) self.tp_size)

View File

@ -1446,7 +1446,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(nn.Module, SupportsMultiModal,
return None return None
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video). # tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary

View File

@ -586,10 +586,10 @@ class Gemma3nForConditionalGeneration(nn.Module, SupportsMultiModal,
# ruff: noqa # ruff: noqa
# The Gemma3nProcessor expects all audio will be 30s in length and inserts 188 audio soft tokens into the # The Gemma3nProcessor expects all audio will be 30s in length and inserts 188 audio soft tokens into the
# text to account for this. However, the audio preprocessing and encoder do not gurarantee they will # text to account for this. However, the audio preprocessing and encoder do not guarantee they will
# produce 188 soft tokens; they will produce at most that many tokens, but they may produce fewer tokens # produce 188 soft tokens; they will produce at most that many tokens, but they may produce fewer tokens
# depending on the length of the longest audio input in the batch. When we encounter this situation, we pad # depending on the length of the longest audio input in the batch. When we encounter this situation, we pad
# the audio feature out to 188 soft tokens with the emebedding of the last token in the embed_audio vocab. # the audio feature out to 188 soft tokens with the embedding of the last token in the embed_audio vocab.
# TODO precompute and cache padding # TODO precompute and cache padding
audio_padding_toks = torch.tensor([[self.vocab_size - 1]], audio_padding_toks = torch.tensor([[self.vocab_size - 1]],
dtype=torch.long, dtype=torch.long,

View File

@ -560,7 +560,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
return [] return []
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image). # tensor corresponding to a multimodal data item (image).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary

View File

@ -1154,7 +1154,7 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
return None return None
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video). # tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary