deepnet
deepnet copied to clipboard
question in cnn back propagation
Hi, thanks for your work. I learned a lot from your blog and code!
In your gradient test code, I found that there may be something wrong in the back propagation for dX of CNN, test code is as blow and data used in the code are here:npy.zip
w = np.load('w.npy')
b = np.load('b.npy')
dout = np.load('dout.npy')
x = np.load('x.npy')
c_layer = Conv((1, 28, 28),n_filter=32,h_filter=3,w_filter=3,stride=1,padding=1)
c_layer.W = w
c_layer.b = b
dx_num = numerical_gradient_array(lambda x: c_layer.forward(x), x, dout)
dw_num = numerical_gradient_array(lambda w: c_layer.forward(x), w, dout)
db_num = numerical_gradient_array(lambda b: c_layer.forward(x), b, dout)
out = c_layer.forward(x)
dx,grads = c_layer.backward(dout)
dw,db = grads
print("Testing backward pass of Conv Layer")
print("dX error: ",rel_error(dx,dx_num))
print("dW error: ",rel_error(dw,dw_num))
print("db error: ",rel_error(db,db_num))
the results is as blow:
Testing backward pass of Conv Layer dX error: 1.0 dW error: 4.938012368517188e-11 db error: 2.0764855776951717e-07
And in another test script, we can see the avg difference of dW is about -0.074.
def run():
def make_mnist_cnn(X_dim, num_class):
conv = Conv(X_dim, n_filter=32, h_filter=3,
w_filter=3, stride=1, padding=1)
relu_conv = ReLU()
maxpool = Maxpool(conv.out_dim, size=2, stride=1)
flat = Flatten()
fc = FullyConnected(np.prod(maxpool.out_dim), num_class)
return [conv, relu_conv, maxpool, flat, fc]
mnist_dims = (1, 28, 28)
cnn = CNN(make_mnist_cnn(mnist_dims, num_class=10))
import keras
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1], x_train.shape[2])
x_test = x_test.reshape(x_test.shape[0], 1, x_test.shape[1], x_test.shape[2])
y_train = keras.utils.to_categorical(y_train, num_classes)
# y_test = keras.utils.to_categorical(y_test, num_classes)
x = x_test[:2]
y = y_test[:2]
# y
y_pred = cnn.forward(x)
loss,dout = SoftmaxLoss(y_pred,y)
grads = cnn.backward(dout=dout)
cw = cnn.params[0][0]
cnn.params[0][0] = cw
# dx_num = numerical_gradient_array(lambda x: cnn.forward(x), x, dout)
dw_num = numerical_gradient_array(lambda w: cnn.forward(x), cw, dout)
return np.sum(grads[4][0]-dw_num)
dvis = [run() for x in range(10)]
np.mean(dvis)
output:
-0.0741057004071033