mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 01:45:02 +08:00
[Docs] Fix warnings in vllm/profiler and vllm/transformers_utils (#25220)
Signed-off-by: windsonsea <haifeng.yao@daocloud.io>
This commit is contained in:
parent
bef180f009
commit
367a480bd3
@ -102,6 +102,7 @@ plugins:
|
|||||||
- https://numpy.org/doc/stable/objects.inv
|
- https://numpy.org/doc/stable/objects.inv
|
||||||
- https://pytorch.org/docs/stable/objects.inv
|
- https://pytorch.org/docs/stable/objects.inv
|
||||||
- https://psutil.readthedocs.io/en/stable/objects.inv
|
- https://psutil.readthedocs.io/en/stable/objects.inv
|
||||||
|
- https://huggingface.co/docs/transformers/main/en/objects.inv
|
||||||
|
|
||||||
markdown_extensions:
|
markdown_extensions:
|
||||||
- attr_list
|
- attr_list
|
||||||
|
|||||||
@ -353,8 +353,8 @@ class layerwise_profile(profile):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
num_running_seqs (Optional[int], optional): When given,
|
num_running_seqs (Optional[int], optional): When given,
|
||||||
num_running_seqs will be passed to LayerProfileResults for metadata
|
num_running_seqs will be passed to LayerProfileResults
|
||||||
update. Defaults to None.
|
for metadata update. Defaults to None.
|
||||||
"""
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
|
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
|
||||||
|
|||||||
@ -74,8 +74,7 @@ class JAISConfig(PretrainedConfig):
|
|||||||
use_cache (`bool`, *optional*, defaults to `True`):
|
use_cache (`bool`, *optional*, defaults to `True`):
|
||||||
Whether or not the model should return the last key/values
|
Whether or not the model should return the last key/values
|
||||||
attentions (not used by all models).
|
attentions (not used by all models).
|
||||||
scale_attn_by_inverse_layer_idx
|
scale_attn_by_inverse_layer_idx (`bool`, *optional*, default `True`):
|
||||||
(`bool`, *optional*, defaults to `False`):
|
|
||||||
Whether to additionally scale attention weights
|
Whether to additionally scale attention weights
|
||||||
by `1 / layer_idx + 1`.
|
by `1 / layer_idx + 1`.
|
||||||
reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
|
reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user