def attention(Q, K, V): dk = K.shape[-1] scores = tf.matmul(Q, K, transpose_b=True) / tf.math.sqrt(dk) attention_weights = tf.nn.softmax(scores, axis=-1) output = tf.matmul(attention_weights, V) return output
This user hasn't posted anything yet.