mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 03:15:00 +08:00
[Doc]: fix typos in various files (#29230)
Signed-off-by: Didier Durand <durand.didier@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
8005e606bf
commit
eca7a8fb59
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
This directory includes benchmarks between DeepSeek's DeepGEMM block fp8 kernels against vLLM's existing triton and CUTLASS-based kernels.
|
This directory includes benchmarks between DeepSeek's DeepGEMM block fp8 kernels against vLLM's existing triton and CUTLASS-based kernels.
|
||||||
|
|
||||||
Currently this just includes dense GEMMs and only works on Hopper GPUs.
|
Currently, this just includes dense GEMMs and only works on Hopper GPUs.
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
|
|||||||
@ -96,7 +96,7 @@ class VllmConfig:
|
|||||||
"""`torch.compile` and cudagraph capture configuration for the model.
|
"""`torch.compile` and cudagraph capture configuration for the model.
|
||||||
|
|
||||||
As a shorthand, one can append compilation arguments via
|
As a shorthand, one can append compilation arguments via
|
||||||
-0.parameter=arguement such as `-O.mode=3` (same as `-O='{"mode":3}'`).
|
-0.parameter=argument such as `-O.mode=3` (same as `-O='{"mode":3}'`).
|
||||||
|
|
||||||
You can specify the full compilation config like so:
|
You can specify the full compilation config like so:
|
||||||
`{"mode": 3, "cudagraph_capture_sizes": [1, 2, 4, 8]}`
|
`{"mode": 3, "cudagraph_capture_sizes": [1, 2, 4, 8]}`
|
||||||
|
|||||||
@ -153,7 +153,7 @@ class DPMetadata:
|
|||||||
@contextmanager
|
@contextmanager
|
||||||
def sp_local_sizes(self, sequence_parallel_size: int):
|
def sp_local_sizes(self, sequence_parallel_size: int):
|
||||||
"""
|
"""
|
||||||
Context mamager for setting self.local_sizes. Same as self.chunked_sizes
|
Context manager for setting self.local_sizes. Same as self.chunked_sizes
|
||||||
but without any chunking.
|
but without any chunking.
|
||||||
"""
|
"""
|
||||||
self.local_sizes = _compute_sp_num_tokens(
|
self.local_sizes = _compute_sp_num_tokens(
|
||||||
|
|||||||
@ -525,7 +525,7 @@ class InputBatch:
|
|||||||
# NOTE: the following is unsafe
|
# NOTE: the following is unsafe
|
||||||
# self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\
|
# self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\
|
||||||
# self.token_ids_cpu[i2, ...], self.token_ids_cpu[i1, ...]
|
# self.token_ids_cpu[i2, ...], self.token_ids_cpu[i1, ...]
|
||||||
# instead, we need to temporiarily copy the data for one of the indices
|
# instead, we need to temporarily copy the data for one of the indices
|
||||||
# TODO(lucas): optimize this by only copying valid indices
|
# TODO(lucas): optimize this by only copying valid indices
|
||||||
tmp = self.token_ids_cpu[i1, ...].copy()
|
tmp = self.token_ids_cpu[i1, ...].copy()
|
||||||
self.token_ids_cpu[i1, ...] = self.token_ids_cpu[i2, ...]
|
self.token_ids_cpu[i1, ...] = self.token_ids_cpu[i2, ...]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user