mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-18 14:04:36 +08:00
[Misc][Docs] Raise error when flashinfer is not installed and VLLM_ATTENTION_BACKEND is set (#12513)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
parent
ccc00515fd
commit
444b0f0f62
@ -184,3 +184,13 @@ chat_response = client.chat.completions.create(
|
||||
)
|
||||
print("Chat response:", chat_response)
|
||||
```
|
||||
|
||||
## On Attention Backends
|
||||
|
||||
Currently, vLLM supports multiple backends for efficient Attention computation across different platforms and accelerator architectures. It automatically selects the most performant backend compatible with your system and model specifications.
|
||||
|
||||
If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`.
|
||||
|
||||
```{attention}
|
||||
There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see [Dockerfile](https://github.com/vllm-project/vllm/blob/main/Dockerfile) for instructions on how to install it.
|
||||
```
|
||||
|
||||
@ -9,6 +9,7 @@ import sys
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field, replace
|
||||
from importlib.util import find_spec
|
||||
from pathlib import Path
|
||||
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Counter, Dict,
|
||||
Final, List, Literal, Mapping, Optional, Protocol, Set,
|
||||
@ -294,6 +295,14 @@ class ModelConfig:
|
||||
|
||||
self.maybe_pull_model_tokenizer_for_s3(model, tokenizer)
|
||||
|
||||
if (backend := envs.VLLM_ATTENTION_BACKEND
|
||||
) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
|
||||
raise ValueError(
|
||||
"VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
|
||||
"module was not found."
|
||||
"See https://github.com/vllm-project/vllm/blob/main/Dockerfile"
|
||||
"for instructions on how to install it.")
|
||||
|
||||
# The tokenizer version is consistent with the model version by default.
|
||||
if tokenizer_revision is None:
|
||||
self.tokenizer_revision = revision
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user