mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 20:35:01 +08:00
allow disable flashinfer prefill (#25276)
Signed-off-by: Lu Fang <fanglu@fb.com>
This commit is contained in:
parent
431535b522
commit
ee7a66dd9a
@ -32,6 +32,7 @@ if TYPE_CHECKING:
|
|||||||
VLLM_CONFIG_ROOT: str = os.path.expanduser("~/.config/vllm")
|
VLLM_CONFIG_ROOT: str = os.path.expanduser("~/.config/vllm")
|
||||||
VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai"
|
VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai"
|
||||||
VLLM_NO_USAGE_STATS: bool = False
|
VLLM_NO_USAGE_STATS: bool = False
|
||||||
|
VLLM_DISABLE_FLASHINFER_PREFILL: bool = False
|
||||||
VLLM_DO_NOT_TRACK: bool = False
|
VLLM_DO_NOT_TRACK: bool = False
|
||||||
VLLM_USAGE_SOURCE: str = ""
|
VLLM_USAGE_SOURCE: str = ""
|
||||||
VLLM_CONFIGURE_LOGGING: int = 1
|
VLLM_CONFIGURE_LOGGING: int = 1
|
||||||
@ -479,6 +480,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
lambda: os.environ.get("VLLM_USAGE_STATS_SERVER", "https://stats.vllm.ai"),
|
lambda: os.environ.get("VLLM_USAGE_STATS_SERVER", "https://stats.vllm.ai"),
|
||||||
"VLLM_NO_USAGE_STATS":
|
"VLLM_NO_USAGE_STATS":
|
||||||
lambda: os.environ.get("VLLM_NO_USAGE_STATS", "0") == "1",
|
lambda: os.environ.get("VLLM_NO_USAGE_STATS", "0") == "1",
|
||||||
|
"VLLM_DISABLE_FLASHINFER_PREFILL":
|
||||||
|
lambda: os.environ.get("VLLM_DISABLE_FLASHINFER_PREFILL", "0") == "1",
|
||||||
"VLLM_DO_NOT_TRACK":
|
"VLLM_DO_NOT_TRACK":
|
||||||
lambda: (os.environ.get("VLLM_DO_NOT_TRACK", None) or os.environ.get(
|
lambda: (os.environ.get("VLLM_DO_NOT_TRACK", None) or os.environ.get(
|
||||||
"DO_NOT_TRACK", None) or "0") == "1",
|
"DO_NOT_TRACK", None) or "0") == "1",
|
||||||
|
|||||||
@ -412,7 +412,8 @@ M = TypeVar("M", bound=MLACommonMetadata)
|
|||||||
def use_flashinfer_prefill() -> bool:
|
def use_flashinfer_prefill() -> bool:
|
||||||
# For blackwell default to flashinfer prefill if it's available since
|
# For blackwell default to flashinfer prefill if it's available since
|
||||||
# it is faster than FA2.
|
# it is faster than FA2.
|
||||||
return (flashinfer_available and not envs.VLLM_USE_CUDNN_PREFILL
|
return (not envs.VLLM_DISABLE_FLASHINFER_PREFILL and flashinfer_available
|
||||||
|
and not envs.VLLM_USE_CUDNN_PREFILL
|
||||||
and current_platform.is_device_capability(100))
|
and current_platform.is_device_capability(100))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user