tf-densecrf
tf-densecrf copied to clipboard
Does it support gradient back propagation?
I tried to feed a trainable feature map into the proposed operator, and compute the probability as output. Then I also defined a loss on the probability, and used an optimizer to optimize it. I expected to see that the loss will drop when the program iterates, however, I found that the loss was always unchanged. So I am wondering whether it supports gradient back propagation?
The code is attached, and the output was like:
Iter000, Loss: 136492.593750
Iter001, Loss: 136492.593750
Iter002, Loss: 136492.593750
Iter003, Loss: 136492.593750
Iter004, Loss: 136492.593750
Iter005, Loss: 136492.593750
Iter006, Loss: 136492.593750
Iter007, Loss: 136492.593750
Iter008, Loss: 136492.593750
Iter009, Loss: 136492.593750
Iter010, Loss: 136492.593750
Iter011, Loss: 136492.593750
Iter012, Loss: 136492.593750
Iter013, Loss: 136492.593750
Code:
import tensorflow as tf
import tensorflow.contrib.slim as slim
# NOTE: make sure to build the ops
import filter_ops
import matplotlib.pyplot as plt
import pickle
import numpy as np
import timeit
import os, cv2
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
def _shape_2d(input):
shape = tf.shape(input)
n, h0, w0, c = input.get_shape().as_list()
_, h1, w1, _ = [shape[i] for i in range(len(input.get_shape()))]
return n, h1 if h0 is None else h0, w1 if w0 is None else w0, c
def _conv2d_weights(x, out_dim, kernel_size, stddev, is_deconv=False, with_b=True):
c = _shape_2d(x)[-1]
w_shape = [kernel_size[0], kernel_size[1]]
w_shape += [out_dim, c] if is_deconv else [c, out_dim]
W = tf.get_variable('w', w_shape, initializer=tf.random_normal_initializer(stddev=stddev))
b = tf.get_variable('b', [out_dim], initializer=tf.constant_initializer(0.0)) if with_b else None
return W, b
def conv2d(x, output_dim, kernel_size=(3,3), stride_size=(1,1), padding='SAME', stddev=0.02, name='conv2d', with_b=True):
with tf.variable_scope(name):
W, b = _conv2d_weights(x, output_dim, kernel_size, stddev, False, with_b)
conv = tf.nn.conv2d(x, W, strides=[1, stride_size[0], stride_size[1], 1], padding=padding)
return conv if b is None else tf.nn.bias_add(conv, b)
def compute_spatial(height, width):
# TODO: is this a bottleneck?
xs = tf.tile(tf.reshape(tf.range(0, width), [1, width, 1]), [height, 1, 1])
ys = tf.tile(tf.reshape(tf.range(0, height), [height, 1, 1]), [1, width, 1])
return tf.cast(tf.concat([xs, ys], 2), tf.float32)
def meanfield_op(unary,
features_sp, features_bl,
kernel_sp, kernel_bl, kernel_compat,
num_iters=10, step=1.0, norm_eps=1e-20,
name='meanfield'):
with tf.variable_scope(name):
N = unary.shape[0]
norm_feed = tf.ones([N, 1], dtype=tf.float32)
# TODO: should we make a batch version?
lattice_sp = filter_ops.permuto_init(features_sp)
norm_sp = 1.0 / (norm_eps + filter_ops.ph_filter(norm_feed, lattice_sp))
lattice_bl = filter_ops.permuto_init(features_bl)
norm_bl = 1.0 / (norm_eps + filter_ops.ph_filter(norm_feed, lattice_bl))
def _meanfield_iter(prev, it):
prob = slim.softmax(prev)
msg_sp_raw = filter_ops.ph_filter(prob, lattice_sp)
msg_sp = tf.matmul(msg_sp_raw * norm_sp, kernel_sp)
msg_bl_raw = filter_ops.ph_filter(prob, lattice_bl)
msg_bl = tf.matmul(msg_bl_raw * norm_bl, kernel_bl)
# TODO: we can even weight these between each other
pairwise = tf.matmul(msg_sp + msg_bl, kernel_compat)
nat_up = unary - pairwise
return (1.0 - step) * prev + step * nat_up
nats = tf.foldl(_meanfield_iter,
tf.range(num_iters),
unary)
return nats
def main():
H = W = 256
NC = 2
init_seg = np.random.random([10, NC, H, W])#np.load('data/segmentations.npy')
im_bgr = np.random.random([10, H, W, 3])#np.load('data/images.npy')
init_seg = init_seg.reshape([-1, NC, H, W])
im_bgr = im_bgr.reshape([-1, H, W, 3])
im_rgb = im_bgr[:,:,:,::-1]
init_seg = init_seg[0]
init_seg = init_seg.transpose((1,2,0))
im_rgb = im_rgb[0]
OH, OW, NC = init_seg.shape
height, width, num_classes = OH, OW, NC
ph_init_seg = tf.placeholder(tf.float32, [OH, OW, NC], name='init_seg')
ph_rgb = tf.placeholder(tf.float32, [OH, OW, 3], name='rgb')
ph_sxy_sp = tf.placeholder(tf.float32, [])
ph_sxy_bl = tf.placeholder(tf.float32, [])
ph_srgb_bl = tf.placeholder(tf.float32, [])
kernel_sp = 1.0 * tf.diag(tf.ones(num_classes, dtype=np.float32))
kernel_bl = 3.0 * tf.diag(tf.ones(num_classes, dtype=np.float32))
kernel_compat = -1.0 * tf.diag(tf.ones(num_classes, dtype=np.float32))
features_sp = tf.reshape(compute_spatial(height, width) / ph_sxy_sp, [height * width, 2])
features_bl = tf.reshape(tf.concat(axis=2, values=[compute_spatial(height, width) / ph_sxy_bl, ph_rgb / ph_srgb_bl]), [height * width, 2 + 3])
init_seg0 = tf.reshape(ph_init_seg, [1, height, width, num_classes])
init_seg1 = conv2d(init_seg0, num_classes)
init_seg2 = tf.reshape(init_seg1, [height * width, num_classes])
nats = meanfield_op(init_seg2, features_sp, features_bl, kernel_sp, kernel_bl, kernel_compat, num_iters=10)
probs = nats
probs = tf.reshape(probs, [OH, OW, NC])
loss = tf.reduce_sum(probs * ph_init_seg)
opt = tf.train.AdamOptimizer(10.1)
tvars = tf.trainable_variables()
for tvar in tvars:
print(tvar.name)
grad = opt.compute_gradients(loss, var_list=tvars)
appgrad = opt.apply_gradients(grad)
print(grad)
with tf.Session() as sess:
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
for t in range(100):
feed_dict = {
ph_init_seg: init_seg,
ph_rgb: im_rgb,
ph_sxy_sp: 1.0,
ph_sxy_bl: 3.0,
ph_srgb_bl: 60.0
}
_, lossval, probsval = sess.run([appgrad, loss, probs], feed_dict=feed_dict)
print('Iter%03d, Loss: %f' % (t, lossval))
if __name__ == '__main__':
main()