DeepAlignmentNetwork
DeepAlignmentNetwork copied to clipboard
Separable Depthwise Layer?
Hi, heavy questioner is back.
today, I tried to apply popular separable dw conv layer introduced in theano 0.10.0(but not in lasagne) to DAN, but as I'm a newbie in theano, I got lost....
below is my custom layer code for "SeparableDepthWiseConvolutionLayer"
[ SeparableDepthWiseConvolutionLayer.py ]
import theano
from lasagne.layers import Layer
import lasagne.init
from theano.tensor.nnet.abstract_conv import separable_conv2d
import numpy
rng = numpy.random
class SeparableDepthWiseConvolutionLayer(Layer):
def __init__(self, input, input_shape, num_dw_channels=32, num_pw_filters=32, filter_size=[3, 3], stride=[1, 1], **kwargs):
self.input_shape=input_shape
self.num_dw_channels = num_dw_channels
self.num_pw_filters = num_pw_filters
self.filter_size=filter_size
self.stride=stride
# dimension : self.num_dw_channels, 1, self.filter_size[0], self.filter_size[1]
self.depthwise_filters= numpy.asarray(rng.randn(self.num_dw_channels * self.filter_size[0] * self.filter_size[1])).astype(theano.config.floatX)
# dimension : (self.num_pw_filters, self.num_dw_channels, 1, 1)
self.pointwise_filters= numpy.asarray(rng.randn(self.num_pw_filters * self.num_dw_channels)).astype(theano.config.floatX)
super(SeparableDepthWiseConvolutionLayer, self).__init__(input, **kwargs)
def get_output_shape_for(self, input_shape):
return (1, input_shape[1]*int(self.stride[0]), input_shape[2]//self.stride[0],input_shape[3]//self.stride[0])
def get_output_for(self, input, **kwargs):
x_sym = theano.tensor.tensor4('x')
dfilter_sym = theano.tensor.tensor4('d')
pfilter_sym = theano.tensor.tensor4('p')
sep_op = separable_conv2d(x_sym ,
dfilter_sym ,
pfilter_sym ,
self.num_dw_channels,
input_shape=self.input_shape,
depthwise_filter_shape=(self.num_dw_channels,1,self.filter_size[0],self.filter_size[1]),
pointwise_filter_shape=(self.num_pw_filters, self.num_dw_channels, 1, 1)
)
fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
output = fun(input ,self.depthwise_filters, self.pointwise_filters)
return output
and then, I replaced all existing lasagnes' conv+batchnorm layers in createCNN() with the new layer as below;
[ DANtraining.py ]
from SepDWConvLayer import SeparableDepthWiseConvolutionLayer as SPDWConv ... ... def createCNN(self): ... ...
net['s1_conv1_1'] = batch_norm(Conv2DLayer(net['input'], 64, 3, pad='same', W=GlorotUniform('relu'))) ->> net['s1_conv1_1'] = batch_norm(SPDWConv(net['s1_conv0_1'], net['s1_conv0_1'].output_shape, 32, 64,[3, 3], stride=[1, 1] )) net['s1_conv1_2'] = batch_norm(Conv2DLayer(net['s1_conv1_1'], 64, 3, pad='same', W=GlorotUniform('relu'))) ->> net['s1_conv1_2'] = batch_norm(SPDWConv(net['s1_conv1_1'], net['s1_conv1_1'].output_shape, 64, 128,[3, 3], stride=[2, 2] )) .. .. .. so on.
so the createCNN() looks like :
def createCNN(self):
net = {}
net['input'] = lasagne.layers.InputLayer(shape=(1, self.nChannels, self.imageHeight, self.imageWidth), input_var=self.data)
print("Input shape: {0}".format(net['input'].output_shape))
#STAGE 1
net['s1_conv0_1'] = batch_norm(Conv2DLayer(net['input'], 32, 3, stride=(1,1), pad='same', W=GlorotUniform('relu')))
print("s1_conv0_1 shape: {0}".format(net['s1_conv0_1'].output_shape))
net['s1_conv1_1'] = batch_norm(SPDWConv(net['s1_conv0_1'], net['s1_conv0_1'].output_shape, 32, 64,[3, 3], stride=[1, 1] ))
print("s1_conv1_1 shape: {0}".format(net['s1_conv1_1'].output_shape))
net['s1_conv1_2'] = batch_norm(SPDWConv(net['s1_conv1_1'], net['s1_conv1_1'].output_shape, 64, 128,[3, 3], stride=[2, 2] ))
print("s1_conv1_2 shape: {0}".format(net['s1_conv1_2'].output_shape))
net['s1_conv2_1'] = batch_norm(SPDWConv(net['s1_conv1_2'], net['s1_conv1_2'].output_shape,128, 128,[3, 3], stride=[1, 1] ))
print("s1_conv2_1 shape: {0}".format(net['s1_conv2_1'].output_shape))
net['s1_conv2_2'] = batch_norm(SPDWConv(net['s1_conv2_1'], net['s1_conv2_1'].output_shape,128, 256,[3, 3], stride=[2, 2] ))
print("s1_conv2_2 shape: {0}".format(net['s1_conv2_2'].output_shape))
net['s1_conv3_1'] = batch_norm(SPDWConv(net['s1_conv2_2'], net['s1_conv2_2'].output_shape,256, 256,[3, 3], stride=[1, 1] ))
print("s1_conv3_1 shape: {0}".format(net['s1_conv3_1'].output_shape))
net['s1_conv3_2'] = batch_norm(SPDWConv(net['s1_conv3_1'], net['s1_conv3_1'].output_shape,256, 512,[3, 3], stride=[2, 2] ))
print("s1_conv3_2 shape: {0}".format(net['s1_conv3_2'].output_shape))
net['s1_conv4_1'] = batch_norm(SPDWConv(net['s1_conv3_2'], net['s1_conv3_2'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
print("s1_conv4_1 shape: {0}".format(net['s1_conv4_1'].output_shape))
net['s1_conv4_2'] = batch_norm(SPDWConv(net['s1_conv4_1'], net['s1_conv4_1'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
print("s1_conv4_2 shape: {0}".format(net['s1_conv4_2'].output_shape))
net['s1_conv4_3'] = batch_norm(SPDWConv(net['s1_conv4_2'], net['s1_conv4_2'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
print("s1_conv4_3 shape: {0}".format(net['s1_conv4_3'].output_shape))
net['s1_conv4_4'] = batch_norm(SPDWConv(net['s1_conv4_3'], net['s1_conv4_3'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
print("s1_conv4_4 shape: {0}".format(net['s1_conv4_4'].output_shape))
net['s1_conv4_5'] = batch_norm(SPDWConv(net['s1_conv4_4'], net['s1_conv4_4'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
print("s1_conv4_5 shape: {0}".format(net['s1_conv4_5'].output_shape))
net['s1_conv5_1'] = batch_norm(SPDWConv(net['s1_conv4_5'], net['s1_conv4_5'].output_shape,512, 1024,[3, 3], stride=[2, 2]))
print("s1_conv5_1 shape: {0}".format(net['s1_conv5_1'].output_shape))
net['s1_conv5_2'] = batch_norm(SPDWConv(net['s1_conv5_1'], net['s1_conv5_1'].output_shape,1024,1024,[3, 3], stride=[1, 1]))
print("s1_conv5_2 shape: {0}".format(net['s1_conv5_2'].output_shape))
net['s1_pool6'] = lasagne.layers.Pool2DLayer(net['s1_conv5_2'], 1)
print("s1_pool6 shape: {0}".format(net['s1_pool6'].output_shape))
net['s1_fc1_dropout'] = lasagne.layers.DropoutLayer(net['s1_pool6'], p=0.5)
print("s1_fc1_dropout shape: {0}".format(net['s1_fc1_dropout'].output_shape))
net['s1_fc1'] = batch_norm(lasagne.layers.DenseLayer(net['s1_fc1_dropout'], num_units=256, W=GlorotUniform('relu')))
print("s1_fc1 shape: {0}".format(net['s1_fc1'].output_shape))
net['s1_output'] = lasagne.layers.DenseLayer(net['s1_fc1'], num_units=136, nonlinearity=None)
print("s1_output shape: {0}".format(net['s1_output'].output_shape))
net['s1_landmarks'] = LandmarkInitLayer(net['s1_output'], self.initLandmarks)
print("s1_landmarks shape: {0}".format(net['s1_landmarks'].output_shape))
for i in range(1, self.nStages):
self.addDANStage(i + 1, net)
net['output'] = net['s' + str(self.nStages) + '_landmarks']
return net
my questions are,
- above code makes an error in get_output_for : x_sym = theano.tensor.tensor4('x') " Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array? "
( I'm referencing the theano test code "test_abstract_conv.py", https://github.com/Theano/Theano/blob/8dccbe6e1000239f57006e556fe8f737bb717aba/theano/tensor/nnet/tests/test_abstract_conv.py
There is def test_interface2d(self): in line 1683, and they test it with real numpy array values for input and deptwise/pointwise filters...
self.x = np.array([[[[1, 2, 3, 4, 5], [3, 2, 1, 4, 5], [3, 3, 1, 3, 6], [5, 3, 2, 1, 1], [4, 7, 1, 2, 1]], [[3, 3, 1, 2, 6], [6, 5, 4, 3, 1], [3, 4, 5, 2, 3], [6, 4, 1, 3, 4], [2, 3, 4, 2, 5]]]]).astype(theano.config.floatX) self.depthwise_filter = np.array([[[[3, 2, 1], [5, 3, 2], [6, 4, 2]]], [[[5, 5, 2], [3, 7, 4], [3, 5, 4]]], [[[7, 4, 7], [5, 3, 3], [1, 3, 1]]], [[[4, 4, 4], [2, 4, 6], [0, 0, 7]]]]).astype(theano.config.floatX) self.pointwise_filter = np.array([[[[4]], [[1]], [[3]], [[5]]], [[[2]], [[1]], [[2]], [[8]]]]).astype(theano.config.floatX) x_sym = theano.tensor.tensor4('x') dfilter_sym = theano.tensor.tensor4('d') pfilter_sym = theano.tensor.tensor4('p') sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, self.x.shape[1]) fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN') top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
but in my code, I'm passing "input"(is it TensorVariable?) to the theano.function
def get_output_for(self, input, **kwargs): x_sym = theano.tensor.tensor4('x') dfilter_sym = theano.tensor.tensor4('d') pfilter_sym = theano.tensor.tensor4('p') sep_op = separable_conv2d(x_sym , dfilter_sym , pfilter_sym , ...) fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN') output = fun(input ,self.depthwise_filters, self.pointwise_filters) return output
another try also failed :
def __init__(self, input, input_shape, num_dw_channels=32, num_pw_filters=32, filter_size=[3, 3], stride=[1, 1],
**kwargs):
self.input=inputs
... ... def get_output_for(self, input, **kwargs): x_sym = theano.tensor.tensor4('x_sym') dfilter_sym = theano.tensor.tensor4('dfilter_sym') pfilter_sym = theano.tensor.tensor4('pfilter_sym') sep_op = separable_conv2d(x_sym , dfilter_sym, pfilter_sym,... ) fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN') output = fun(self.input ,self.depthwise_filters, self.pointwise_filters) return output
error : x_sym = theano.tensor.tensor4('x_sym') float() argument must be a string or a number
is there a way to pass the real value to theano.function to avoid this error? I thouht, I should use symbols to build a graph for compile.
-
do you think the theano's new separable_conv2d op(which uses Abstract2D class) can replace the existing Conv2DLayer as I did? "Abstract" seems to me, it presents just an interface, so user should implement the actual mothod. But when I followed the theano codes, it seems there is an actual implementation for depthwise + pointwise conv in abtract_conv.py (https://github.com/Theano/Theano/blob/4d46e410bc765e9e288996c7da693146df69e3b9/theano/tensor/nnet/abstract_conv.py).
-
what method could you suggest for initializing depthwise/pointwise weight?
thank you in advance!
Hi,
As for 1 in your custom layer you should
- pass the variable named input as the first parameter to separable_conv2d (instead of x_sym)
- as for the dfilter_sym and pfilter_sym variables, those should be learnable members of your class, look at the implementation of the Conv2DLayer here: https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/conv.py for tips.
As for 2: I think that abstract here is supposed to mean "non-standard convolution" rather than abstract in the object-oriented programming meaning.
As for 3: never tried these before, but maybe you could try the standard glorot initialization?
Marek