mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 02:17:03 +08:00
Merge branch 'main' into woosuk/input-prep
This commit is contained in:
commit
633f9f006d
@ -363,8 +363,10 @@ class Qwen2_5_VisionAttention(nn.Module):
|
||||
q, k, v = (rearrange(x, "s b ... -> b s ...").contiguous()
|
||||
for x in (q, k, v))
|
||||
if rotary_pos_emb is not None:
|
||||
q = apply_rotary_pos_emb_vision(q, rotary_pos_emb)
|
||||
k = apply_rotary_pos_emb_vision(k, rotary_pos_emb)
|
||||
# [2 * b, s, heads, head_dim]
|
||||
qk_concat = torch.cat([q, k], dim=0)
|
||||
qk_rotated = apply_rotary_pos_emb_vision(qk_concat, rotary_pos_emb)
|
||||
q, k = torch.chunk(qk_rotated, 2, dim=0)
|
||||
|
||||
if self.is_flash_attn_backend:
|
||||
if self.attn_backend == _Backend.ROCM_AITER_FA:
|
||||
@ -388,8 +390,8 @@ class Qwen2_5_VisionAttention(nn.Module):
|
||||
causal=False)
|
||||
|
||||
context_layer = rearrange(output,
|
||||
"(b s) ... -> b s ...",
|
||||
b=batch_size)
|
||||
"(b s) h d -> s b (h d)",
|
||||
b=batch_size).contiguous()
|
||||
elif self.attn_backend == _Backend.TORCH_SDPA:
|
||||
# Execute attention entry by entry for speed & less VRAM.
|
||||
outputs = []
|
||||
@ -408,6 +410,8 @@ class Qwen2_5_VisionAttention(nn.Module):
|
||||
output_i = rearrange(output_i, "b h s d -> b s h d ")
|
||||
outputs.append(output_i)
|
||||
context_layer = torch.cat(outputs, dim=1)
|
||||
context_layer = rearrange(context_layer,
|
||||
"b s h d -> s b (h d)").contiguous()
|
||||
elif self.attn_backend == _Backend.XFORMERS:
|
||||
from xformers import ops as xops
|
||||
from xformers.ops.fmha.attn_bias import BlockDiagonalMask
|
||||
@ -418,8 +422,8 @@ class Qwen2_5_VisionAttention(nn.Module):
|
||||
|
||||
context_layer = xops.memory_efficient_attention_forward(
|
||||
q, k, v, attn_bias=attn_bias, p=0, scale=None)
|
||||
context_layer = rearrange(context_layer,
|
||||
"b s h d -> s b (h d)").contiguous()
|
||||
context_layer = rearrange(context_layer,
|
||||
"b s h d -> s b (h d)").contiguous()
|
||||
|
||||
output, _ = self.proj(context_layer)
|
||||
return output
|
||||
|
||||
@ -2,10 +2,13 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import fnmatch
|
||||
from typing import Any, Optional
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from vllm.utils import PlaceholderModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from botocore.client import BaseClient
|
||||
|
||||
try:
|
||||
import boto3
|
||||
except ImportError:
|
||||
@ -26,7 +29,7 @@ def _filter_ignore(paths: list[str], patterns: list[str]) -> list[str]:
|
||||
]
|
||||
|
||||
|
||||
def glob(s3: Optional[Any] = None,
|
||||
def glob(s3: Optional["BaseClient"] = None,
|
||||
path: str = "",
|
||||
allow_pattern: Optional[list[str]] = None) -> list[str]:
|
||||
"""
|
||||
@ -51,7 +54,7 @@ def glob(s3: Optional[Any] = None,
|
||||
|
||||
|
||||
def list_files(
|
||||
s3: Any,
|
||||
s3: "BaseClient",
|
||||
path: str,
|
||||
allow_pattern: Optional[list[str]] = None,
|
||||
ignore_pattern: Optional[list[str]] = None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user