mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 01:05:01 +08:00
[Core] Cache vllm_is_batch_invariant (#28304)
Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
parent
f31419ed8b
commit
ac0bb2c307
@ -4,6 +4,7 @@ import contextlib
|
|||||||
import os
|
import os
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
|
from functools import cache
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@ -857,6 +858,7 @@ def get_batch_invariant_attention_block_size() -> AttentionBlockSize:
|
|||||||
return AttentionBlockSize(block_m=16, block_n=16)
|
return AttentionBlockSize(block_m=16, block_n=16)
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
def vllm_is_batch_invariant():
|
def vllm_is_batch_invariant():
|
||||||
env_key = "VLLM_BATCH_INVARIANT"
|
env_key = "VLLM_BATCH_INVARIANT"
|
||||||
is_overridden = False
|
is_overridden = False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user