From c0ffb6ed716896143b65b38cc958e73284b92cea Mon Sep 17 00:00:00 2001 From: Patrick Labatut <60359573+patricklabatut@users.noreply.github.com> Date: Wed, 26 Apr 2023 02:26:24 +0200 Subject: [PATCH] Do not force using FlashAttention (#58) Remove hardcoded selection of operator implementation and use xFormers fMHA dispatcher instead. --- dinov2/layers/attention.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dinov2/layers/attention.py b/dinov2/layers/attention.py index c789ebd..1f9b0c9 100644 --- a/dinov2/layers/attention.py +++ b/dinov2/layers/attention.py @@ -73,11 +73,7 @@ class MemEffAttention(Attention): q, k, v = unbind(qkv, 2) - if attn_bias is not None: - self_att_op = fmha.MemoryEfficientAttentionFlashAttentionOp - else: - self_att_op = None - x = memory_efficient_attention(q, k, v, attn_bias=attn_bias, op=self_att_op) + x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) x = x.reshape([B, N, C]) x = self.proj(x) -- GitLab