tf-densecrf Does it support gradient back propagation?

Does it support gradient back propagation?

Open wangchuan opened this issue 5 years ago • 0 comments

I tried to feed a trainable feature map into the proposed operator, and compute the probability as output. Then I also defined a loss on the probability, and used an optimizer to optimize it. I expected to see that the loss will drop when the program iterates, however, I found that the loss was always unchanged. So I am wondering whether it supports gradient back propagation?

The code is attached, and the output was like:

Iter000, Loss: 136492.593750
Iter001, Loss: 136492.593750
Iter002, Loss: 136492.593750
Iter003, Loss: 136492.593750
Iter004, Loss: 136492.593750
Iter005, Loss: 136492.593750
Iter006, Loss: 136492.593750
Iter007, Loss: 136492.593750
Iter008, Loss: 136492.593750
Iter009, Loss: 136492.593750
Iter010, Loss: 136492.593750
Iter011, Loss: 136492.593750
Iter012, Loss: 136492.593750
Iter013, Loss: 136492.593750

Code:

import tensorflow as tf
import tensorflow.contrib.slim as slim
# NOTE: make sure to build the ops
import filter_ops

import matplotlib.pyplot as plt

import pickle
import numpy as np
import timeit

import os, cv2

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

def _shape_2d(input):
    shape = tf.shape(input)
    n, h0, w0, c = input.get_shape().as_list()
    _, h1, w1, _ = [shape[i] for i in range(len(input.get_shape()))]
    return n, h1 if h0 is None else h0, w1 if w0 is None else w0, c

def _conv2d_weights(x, out_dim, kernel_size, stddev, is_deconv=False, with_b=True):
    c = _shape_2d(x)[-1]
    w_shape  = [kernel_size[0], kernel_size[1]]
    w_shape += [out_dim, c] if is_deconv else [c, out_dim]
    W = tf.get_variable('w', w_shape,   initializer=tf.random_normal_initializer(stddev=stddev))
    b = tf.get_variable('b', [out_dim], initializer=tf.constant_initializer(0.0)) if with_b else None
    return W, b

def conv2d(x, output_dim, kernel_size=(3,3), stride_size=(1,1), padding='SAME', stddev=0.02, name='conv2d', with_b=True):
    with tf.variable_scope(name):
        W, b = _conv2d_weights(x, output_dim, kernel_size, stddev, False, with_b)
        conv = tf.nn.conv2d(x, W, strides=[1, stride_size[0], stride_size[1], 1], padding=padding)
        return conv if b is None else tf.nn.bias_add(conv, b)

def compute_spatial(height, width):
    # TODO: is this a bottleneck?
    xs = tf.tile(tf.reshape(tf.range(0, width), [1, width, 1]), [height, 1, 1])
    ys = tf.tile(tf.reshape(tf.range(0, height), [height, 1, 1]), [1, width, 1])
    return tf.cast(tf.concat([xs, ys], 2), tf.float32)

def meanfield_op(unary,
                 features_sp, features_bl,
                 kernel_sp, kernel_bl, kernel_compat,
                 num_iters=10, step=1.0, norm_eps=1e-20,
                 name='meanfield'):
    with tf.variable_scope(name):
        N = unary.shape[0]

        norm_feed = tf.ones([N, 1], dtype=tf.float32)

        # TODO: should we make a batch version?
        lattice_sp = filter_ops.permuto_init(features_sp)
        norm_sp = 1.0 / (norm_eps + filter_ops.ph_filter(norm_feed, lattice_sp))

        lattice_bl = filter_ops.permuto_init(features_bl)
        norm_bl = 1.0 / (norm_eps + filter_ops.ph_filter(norm_feed, lattice_bl))

        def _meanfield_iter(prev, it):
            prob = slim.softmax(prev)

            msg_sp_raw = filter_ops.ph_filter(prob, lattice_sp)
            msg_sp = tf.matmul(msg_sp_raw * norm_sp, kernel_sp)

            msg_bl_raw = filter_ops.ph_filter(prob, lattice_bl)
            msg_bl = tf.matmul(msg_bl_raw * norm_bl, kernel_bl)
            # TODO: we can even weight these between each other
            pairwise = tf.matmul(msg_sp + msg_bl, kernel_compat)
            nat_up = unary - pairwise
            return (1.0 - step) * prev + step * nat_up

        nats = tf.foldl(_meanfield_iter,
                        tf.range(num_iters),
                        unary)
        return nats

def main():
    H = W = 256
    NC = 2
    init_seg = np.random.random([10, NC, H, W])#np.load('data/segmentations.npy')
    im_bgr = np.random.random([10, H, W, 3])#np.load('data/images.npy')
    init_seg = init_seg.reshape([-1, NC, H, W])

    im_bgr = im_bgr.reshape([-1, H, W, 3])
    im_rgb = im_bgr[:,:,:,::-1]

    init_seg = init_seg[0]
    init_seg = init_seg.transpose((1,2,0))
    im_rgb = im_rgb[0]

    OH, OW, NC = init_seg.shape
    height, width, num_classes = OH, OW, NC

    ph_init_seg = tf.placeholder(tf.float32, [OH, OW, NC], name='init_seg')
    ph_rgb = tf.placeholder(tf.float32, [OH, OW, 3], name='rgb')
    ph_sxy_sp = tf.placeholder(tf.float32, [])
    ph_sxy_bl = tf.placeholder(tf.float32, [])
    ph_srgb_bl = tf.placeholder(tf.float32, [])

    kernel_sp = 1.0 * tf.diag(tf.ones(num_classes, dtype=np.float32))
    kernel_bl = 3.0 * tf.diag(tf.ones(num_classes, dtype=np.float32))
    kernel_compat = -1.0 * tf.diag(tf.ones(num_classes, dtype=np.float32))

    features_sp = tf.reshape(compute_spatial(height, width) / ph_sxy_sp, [height * width, 2])
    features_bl = tf.reshape(tf.concat(axis=2, values=[compute_spatial(height, width) / ph_sxy_bl, ph_rgb / ph_srgb_bl]), [height * width, 2 + 3])

    init_seg0 = tf.reshape(ph_init_seg, [1, height, width, num_classes])
    init_seg1 = conv2d(init_seg0, num_classes)
    init_seg2 = tf.reshape(init_seg1, [height * width, num_classes])

    nats = meanfield_op(init_seg2, features_sp, features_bl, kernel_sp, kernel_bl, kernel_compat, num_iters=10)
    probs = nats
    probs = tf.reshape(probs, [OH, OW, NC])

    loss = tf.reduce_sum(probs * ph_init_seg)
    opt = tf.train.AdamOptimizer(10.1)

    tvars = tf.trainable_variables()
    for tvar in tvars:
        print(tvar.name)

    grad = opt.compute_gradients(loss, var_list=tvars)
    appgrad = opt.apply_gradients(grad)
    print(grad)

    with tf.Session() as sess:
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)

        for t in range(100):
            feed_dict = {
                ph_init_seg: init_seg,
                ph_rgb: im_rgb,
                ph_sxy_sp: 1.0,
                ph_sxy_bl: 3.0,
                ph_srgb_bl: 60.0
            }
            _, lossval, probsval = sess.run([appgrad, loss, probs], feed_dict=feed_dict)
            print('Iter%03d, Loss: %f' % (t, lossval))

if __name__ == '__main__':
    main()

Mar 05 '19 06:03 wangchuan

tf-densecrf tf-densecrf copied to clipboard

Does it support gradient back propagation?

tf-densecrf
tf-densecrf copied to clipboard