mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-11 10:27:57 +08:00
[Bugfix] change FlashMLA reorder_batch_threshold (#27777)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
55011aef24
commit
145c00a4d3
@ -71,7 +71,7 @@ class FlashMLAMetadata(MLACommonMetadata[FlashMLADecodeMetadata]):
|
|||||||
class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
|
class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
|
||||||
cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
|
cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
|
||||||
query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM
|
query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM
|
||||||
reorder_batch_threshold: int = 512 # process small prefills with decode pathway
|
reorder_batch_threshold: int = 128 # process small prefills with decode pathway
|
||||||
# ^ TODO(matt): tune this
|
# ^ TODO(matt): tune this
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user