mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:35:00 +08:00
[Perf] Improve/Fix-regression for FA3 in High QPS regimes (#19463)
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
parent
981eeca41a
commit
a045b7e89a
@ -38,7 +38,7 @@ else()
|
|||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
vllm-flash-attn
|
vllm-flash-attn
|
||||||
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
|
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
|
||||||
GIT_TAG 763ad155a1c826f71ff318f41edb1e4e5e376ddb
|
GIT_TAG 2c6bcfc0feb3d9d4a57b243fc159a68aa9933f5b
|
||||||
GIT_PROGRESS TRUE
|
GIT_PROGRESS TRUE
|
||||||
# Don't share the vllm-flash-attn build between build types
|
# Don't share the vllm-flash-attn build between build types
|
||||||
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
|
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
|
||||||
|
|||||||
1
test-qwen
Submodule
1
test-qwen
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 34c31c0af8fc975140b8c85548fefa1eb7f523e4
|
||||||
Loading…
x
Reference in New Issue
Block a user