mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-28 13:27:15 +08:00
[BugFix][CPU] Fix TorchSDPABackendImpl doesn't have use_irope (#21200)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
dcc6cfb991
commit
468e2400fe
@ -2668,7 +2668,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
||||
# TODO: Support other attention modules, e.g., cross-attention
|
||||
if attn_module.attn_type == AttentionType.DECODER:
|
||||
use_local_attention = (self.attention_chunk_size is not None
|
||||
and attn_module.impl.use_irope)
|
||||
and getattr(attn_module.impl,
|
||||
"use_irope", False))
|
||||
if attn_module.sliding_window is not None:
|
||||
kv_cache_spec[layer_name] = SlidingWindowSpec(
|
||||
block_size=block_size,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user