clstm
clstm copied to clipboard
Code sample for original dataset using clstm python and using opencv2
I suffered a lot to come to code clstm python with the original dataset using opencv2 and python. Hope this code helps other people trying to use clstm python.
import clstm
import numpy as np
import os
from scipy.ndimage import filters
import cv2
def mktarget(transcript,noutput):
N = len(transcript)
target = np.zeros((2*N+1,noutput),'f')
#assert 0 not in transcript
target[0,0] = 1
for i,c in enumerate(transcript):
target[2*i+1,c] = 1
target[2*i+2,0] = 1
return target
def decode(pred, codec, threshold = .5):
eps = filters.gaussian_filter(pred[:,0,0],2,mode='nearest')
loc = (np.roll(eps,-1)>eps) & (np.roll(eps,1)>eps) & (np.eps<threshold)
classes = np.argmax(pred,axis=1)[:,0]
codes = classes[loc]
chars = [chr(codec[c]) for c in codes]
return "".join(chars)
if __name__ == "__main__":
f = open("words.txt","r")
lines = f.read().split("\n")
context_lines.pop()
codec = list(set("".join(ans)))
ninput = 100
noutput = len(codec)
print "noutput : ", noutput
#define network and learning rate
net = clstm.make_net_init("bidi","ninput=%d:nhidden=200:noutput=%d"%(ninput,noutput))
net.setLearningRate(1e-4,0.9)
iteration = 200000
#input files data
img_files = filter(lambda n: n.find(".bin.txt") == -1, os.listdir("dataset/"))
img_name = [img_files[i].replace(".png","") for i in range(len(img_files))]
transcripts = []
#load transcripts
for i in range(len(img_name)):
print "loading file", float(i)/float(len(img_name)) * 100, "percent complete"
f = open("dataset/"+img_name[i]+".bin.txt","r")
transcript_text = f.read()
transcripts.append([codec.index(transcript_text[j]) for j in range(len(transcript_text))])
f.close()
#learning
for i in range(iteration):
print float(i)/float(iteration) * 100, "% complete"
index = int(np.random.rand()*len(img_name))
#set input
img = cv2.imread("dataset/" + img_name[i]+".png",0)
img_input = [list([0.0 if img[j][k] == -1 else float(img[j][k])]) for j in range(len(img)) for k in range(len(img[j]))]
#same type as input provided in the tutorial
xs=np.ndarray(shape=(img.shape[1], 100, 1), buffer=np.array(img_input),dtype=np.float32)
net.inputs.aset(xs)
#forward propagation
net.forward()
#prediction
pred = net.outputs.array()
target = mktarget(transcripts[index],noutput)
seq = clstm.Sequence()
seq.aset(target.reshape(-1,noutput,1))
#align ctc
aligned = clstm.Sequence()
clstm.seq_ctc_align(aligned,net.outputs,seq)
aligned = aligned.array()
#delta val
deltas = aligned - net.outputs.array()
#input delta of aligned ctc and output of network
net.d_outputs.aset(deltas)
#backward propagation
net.backward()
#update network
net.update()
#save network
clstm.save_net("sample.clstm",net)