remove einsum usage from create_alibi_bias function in AbstractAttent…

…ion (#781) Co-authored-by: Bryce Meyer <bryce13950@gmail.com> Co-authored-by: Fabian Degen <fabian.degen@mytum.de>
TransformerLensOrg · Nov 25, 2024 · 623407f · 623407f
1 parent b7c4dbd
commit 623407f
Showing 1 changed file with 6 additions and 2 deletions.
diff --git a/transformer_lens/components/abstract_attention.py b/transformer_lens/components/abstract_attention.py
@@ -692,7 +692,11 @@ def create_alibi_bias(
             n_heads, device
         )
 
-        # The ALiBi bias is then m * slope_matrix
-        alibi_bias = torch.einsum("ij,k->kij", slope, multipliers)
+        # Add singleton dimensions to make shapes compatible for broadcasting:
+        slope = einops.rearrange(slope, "query key -> 1 query key")
+        multipliers = einops.rearrange(multipliers, "head_idx -> head_idx 1 1")
+
+        # Element-wise multiplication of the slope and multipliers
+        alibi_bias = multipliers * slope
 
         return alibi_bias