mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-11 17:27:03 +08:00
Sage Moore fixes for full cuda graph support for DeepEP+DeepGEMM LL
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
parent
26d34eb67e
commit
e53382cc2e
@ -75,7 +75,8 @@ class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
|
||||
1, # MQA for the decode path
|
||||
)
|
||||
|
||||
if self.runner.full_cuda_graph:
|
||||
n = num_splits.size(0)
|
||||
if self.runner.full_cuda_graph and (n-1) <= self.runner.cudagraph_batch_sizes[-1]:
|
||||
# First time around (CUDAGraph capture), allocate the static buffer
|
||||
if self.cg_buf_tile_scheduler_metadata is None:
|
||||
self.cg_buf_tile_scheduler_metadata = tile_scheduler_metadata
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user