def attention(Q, K, V):
    dk = K.shape[-1]
    scores = tf.matmul(Q, K, transpose_b=True) / tf.math.sqrt(dk)
    attention_weights = tf.nn.softmax(scores, axis=-1)
    output = tf.matmul(attention_weights, V)
    return output
		
	

This user hasn't liked anything yet.