mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 23:25:44 +08:00
[ROCm][AITER][Bugfix] Switch AITER to use PIECEWISE_AND_FULL compilation (#25104)
Signed-off-by: Rohan138 <rohanpotdar138@gmail.com>
This commit is contained in:
parent
dc34059360
commit
bbdc0f2366
@ -232,7 +232,7 @@ class AiterFlashAttentionMetadata:
|
|||||||
|
|
||||||
class AiterFlashAttentionMetadataBuilder(
|
class AiterFlashAttentionMetadataBuilder(
|
||||||
AttentionMetadataBuilder[AiterFlashAttentionMetadata]):
|
AttentionMetadataBuilder[AiterFlashAttentionMetadata]):
|
||||||
cudagraph_support = AttentionCGSupport.ALWAYS
|
cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
|
||||||
|
|
||||||
def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
|
def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
|
||||||
vllm_config: VllmConfig, device: torch.device):
|
vllm_config: VllmConfig, device: torch.device):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user