Do not force using FlashAttention (#58)

Remove hardcoded selection of operator implementation and use xFormers fMHA dispatcher instead.

Do not force using FlashAttention (#58)
c0ffb6ed · Patrick Labatut · GitHub · ca58ffcd · c0ffb6ed
Unverified Commit c0ffb6ed authored 2 years ago by Patrick Labatut Committed by GitHub 2 years ago
--- a/dinov2/layers/attention.py
+++ b/dinov2/layers/attention.py
@@ -73,11 +73,7 @@ class MemEffAttention(Attention):

        q, k, v = unbind(qkv, 2)

-        if attn_bias is not None:
-            self_att_op = fmha.MemoryEfficientAttentionFlashAttentionOp
-        else:
-            self_att_op = None
-        x = memory_efficient_attention(q, k, v, attn_bias=attn_bias, op=self_att_op)
+        x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
        x = x.reshape([B, N, C])

        x = self.proj(x)