BlurbGenreCollection-HMC
BlurbGenreCollection-HMC copied to clipboard
ValueError: Can not do batch_dot
Hi,
I'm trying to run the capsulenet classifier using the command below:
python main.py --mode train_validation --classifier capsule --lang EN --sequence_length 100 --learning_rate 0.001 --learning_decay 1
However, the create_model
method throws an exception when constructing the model. The Traceback is as follows
Traceback (most recent call last):
File "main.py", line 400, in <module>
main()
File "main.py", line 299, in main
run()
File "main.py", line 321, in run
model = create_model(dev = True, preload = False)
File "main.py", line 371, in create_model
return model_capsule(dev, preload)
File "main.py", line 258, in model_capsule
args.dense_capsule_dim, args.n_channels, 3, dev)
File "/home/daan_vandennest/git/BlurbGenreCollection_Classification/code/networks.py", line 50, in create_model_capsule
name='digitcaps')(primarycaps)
File "/home/daan_vandennest/miniconda3/envs/capsnet/lib/python3.6/site-packages/keras/engine/base_layer.py", line 451, in __call__
output = self.call(inputs, **kwargs)
File "/home/daan_vandennest/git/BlurbGenreCollection_Classification/code/capsulelayers.py", line 119, in call
b += K.batch_dot(outputs, inputs_hat, [2, 3])
File "/home/daan_vandennest/miniconda3/envs/capsnet/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 1261, in batch_dot
'y.shape[%d] (%d != %d).' % (axes[0], axes[1], d1, d2))
ValueError: Can not do batch_dot on inputs with shapes (None, 131, 131, 2805, 16) and (None, 131, None, 2805, 16) with axes=[2, 3]. x.shape[2] != y.shape[3] (131 != 2805).
I'v made no changes to the code. The only difference is that I'm not using tensorflow-gpu, but plain tensorflow. Do you have any idea what might be causing this?
For completeness' sake I've added the output of pip freeze below:
absl-py==0.9.0
astor==0.8.1
beautifulsoup4==4.6.0
bleach==1.5.0
blis==0.2.4
boto==2.49.0
boto3==1.12.14
botocore==1.15.14
certifi==2019.11.28
chardet==3.0.4
cycler==0.10.0
cymem==2.0.3
cysignals==1.10.2
Cython==0.29.15
decorator==4.4.2
docutils==0.15.2
en-core-web-sm==2.1.0
future==0.18.2
gast==0.3.3
gensim==3.8.0
GPy==1.9.5
GPyOpt==1.2.5
graphviz==0.8.3
grpcio==1.27.2
h5py==2.8.0
html5lib==0.9999999
idna==2.9
jmespath==0.9.5
Keras==2.2.5
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.0
kiwisolver==1.1.0
Markdown==3.2.1
matplotlib==2.2.2
murmurhash==1.0.2
numpy==1.16.5
pandas==0.23.4
paramz==0.9.4
pathlib==1.0.1
pipenv==2018.11.26
plac==0.9.6
plumbum==1.6.6
preshed==2.0.1
protobuf==3.11.3
pydot==1.2.3
pyfasttext==0.4.5
pyparsing==2.4.6
python-dateutil==2.8.1
pytz==2019.3
PyYAML==5.3
regex==2017.4.5
requests==2.23.0
s3transfer==0.3.3
scikit-learn==0.19.1
scipy==1.1.0
six==1.14.0
smart-open==1.9.0
spacy==2.1.8
spyder==2.3.8
srsly==1.0.2
stop-words==2015.2.23.1
tensorboard==1.7.0
tensorflow==1.7.0
termcolor==1.1.0
thinc==7.0.8
tqdm==4.43.0
treetaggerwrapper==2.2.4
ujson==1.35
urllib3==1.25.8
virtualenv-clone==0.5.3
wasabi==0.6.0
Werkzeug==1.0.0
我也遇到了相同的问题,好像是因为batch_dot()改版了,跟原来的不一样了!
我把capsulelayers.py 中的call()修改成了这样
def call(self, inputs, training=None): # inputs.shape=[None, input_num_capsule, input_dim_capsule] # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule] inputs_expand = K.expand_dims(inputs, 1)
# Replicate num_capsule dimension to prepare being multiplied by W
# inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule](?,12,2805,8)
inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1])
# Compute `inputs * W` by scanning inputs_tiled on dimension 0.
# x.shape=[num_capsule, input_num_capsule, input_dim_capsule]
# W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule]
# Regard the first two dimensions as `batch` dimension,
# then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule].
# inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule](?,12,?,2805,16)
#inputs_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, [2, 3]), elems=inputs_tiled)
inputs_hat = K.map_fn(lambda x: tf.reshape(tf.matmul(self.W,K.expand_dims(x,3)),[self.num_capsule,self.input_num_capsule,self.dim_capsule]), elems=inputs_tiled)
# Begin: Routing algorithm ---------------------------------------------------------------------#
# The prior for coupling coefficient, initialized as zeros.
# b.shape = [None, self.num_capsule, self.input_num_capsule]
b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule])
assert self.routings > 0, 'The routings should be > 0.'
for i in range(self.routings):
# c.shape=[batch_size, num_capsule, input_num_capsule]
c = tf.nn.softmax(b, dim=1)
# c.shape = [batch_size, num_capsule, input_num_capsule]
# inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
# The first two dimensions as `batch` dimension,
# then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule].
# outputs.shape=[None, num_capsule, dim_capsule]
#outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])) # [None, 10, 16]
c = tf.expand_dims(c,2)
z = tf.matmul(c,inputs_hat)
outputs = squash(tf.reshape(z,[-1,self.num_capsule,self.dim_capsule]))
if i < self.routings - 1:
# outputs.shape = [None, num_capsule, dim_capsule]
# inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
# The first two dimensions as `batch` dimension,
# then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule].
# b.shape=[batch_size, num_capsule, input_num_capsule]
#b += K.batch_dot(outputs, inputs_hat, [2, 3])
outputs1 = tf.expand_dims(outputs,3)
x = tf.matmul(inputs_hat,outputs1)
x = tf.reshape(x,[-1,self.num_capsule,self.input_num_capsule])
b += x
# End: Routing algorithm -----------------------------------------------------------------------#
return outputs