# Accuracy issue to compute the gradients

Hi, I encountered an accuracy issue when computing the backprop of some layers through TensorFlow OPs. The gradients was computed through two different ways:

2. compute the gradients directly through TF APIs, take softmax as an example, we can compute the gradients as follows:
``````  sum_channels = math_ops.reduce_sum(grad_softmax * softmax, -1, keepdims=True)
``````

But I found that the results from the two implementation are not exactly the same. Anyone knows what is the problem?
And another question is that when training in Tensorflow, whether the gradients are the same as that computed through tf.gradients? Thanks.

The entire testing code is as follows (tested with TF 1.15):

``````import numpy as np
import tensorflow as tf

batch_size = 20
from_seq_len = 50
to_seq_len = 50

class testSoftmaxBackprop:
def __init__(self,
batch_size,
from_seq_len,
to_seq_len):
self.batch_size = batch_size
self.from_seq_len = from_seq_len
self.to_seq_len = to_seq_len

self.input_data = tf.placeholder(tf.float32, shape=[
self.from_seq_len,
self.to_seq_len
])

self.out  = tf.nn.softmax(self.input_data)

# self.out = tf.identity(softmax)

def forward(self, np_data):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
out = sess.run([self.out],
feed_dict={
self.input_data:np_data
})
return out[0]

shape=[
self.from_seq_len,
self.to_seq_len
])

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
g_out = sess.run(g,
feed_dict={
self.input_data:np_data,
})

return g_out

# tf_data = tf.constant(np_data, dtype=tf.float32)
tf_softmax = tf.constant(np_softmax, dtype=tf.float32)

sum_channels = tf.reduce_sum(tf_grads * tf_softmax, axis=-1, keepdims=True)
d_out = (tf_grads - sum_channels) * tf_softmax # [h*N, T_q, T_k]

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())

def main():
np.random.seed(0)
np_data = np.random.rand(num_heads * batch_size, from_seq_len, to_seq_len)
np_data = np_data.astype(np.float32)

test_back = testSoftmaxBackprop(batch_size, num_heads, from_seq_len, to_seq_len)

np_softmax = test_back.forward(np_data)

api_save = api_data.reshape(-1)
auto_save = auto_data.reshape(-1)
np.savetxt("api_data.txt", api_save)
np.savetxt("auto_data.txt", auto_save)

print("Results:")
print("Comparison :" + str(np.allclose(api_data, auto_data, atol = 5e-6)))
print("max diff " + str(np.fabs(api_data - auto_data).max()))

if __name__ == "__main__":
main()
``````

Anyone has an idea on this issue?