clstm icon indicating copy to clipboard operation
clstm copied to clipboard

Code sample for original dataset using clstm python and using opencv2

Open kendemu opened this issue 9 years ago • 0 comments

I suffered a lot to come to code clstm python with the original dataset using opencv2 and python. Hope this code helps other people trying to use clstm python.

import clstm
import numpy as np
import os
from scipy.ndimage import filters
import cv2

def mktarget(transcript,noutput):
    N = len(transcript)
    target = np.zeros((2*N+1,noutput),'f')
    #assert 0 not in transcript
    target[0,0] = 1
    for i,c in enumerate(transcript):
        target[2*i+1,c] = 1
        target[2*i+2,0] = 1
    return target

def decode(pred, codec, threshold = .5):
    eps = filters.gaussian_filter(pred[:,0,0],2,mode='nearest')
    loc = (np.roll(eps,-1)>eps) & (np.roll(eps,1)>eps) & (np.eps<threshold)
    classes = np.argmax(pred,axis=1)[:,0]
    codes = classes[loc]
    chars = [chr(codec[c]) for c in codes]
    return "".join(chars)    

if __name__ == "__main__":
    f = open("words.txt","r")
    lines  = f.read().split("\n")
    context_lines.pop()
    codec = list(set("".join(ans)))
    ninput = 100
    noutput = len(codec)
    print "noutput : ", noutput
    #define network and learning rate
    net = clstm.make_net_init("bidi","ninput=%d:nhidden=200:noutput=%d"%(ninput,noutput))
    net.setLearningRate(1e-4,0.9)
    iteration = 200000

    #input files data
    img_files = filter(lambda n: n.find(".bin.txt") == -1, os.listdir("dataset/"))
    img_name  = [img_files[i].replace(".png","") for i in range(len(img_files))]

    transcripts = []

    #load transcripts
    for i in range(len(img_name)):
        print "loading file", float(i)/float(len(img_name)) * 100, "percent complete"
        f = open("dataset/"+img_name[i]+".bin.txt","r")
        transcript_text = f.read()
        transcripts.append([codec.index(transcript_text[j]) for j in range(len(transcript_text))])
        f.close()

    #learning
    for i in range(iteration):
        print float(i)/float(iteration) * 100, "% complete"
        index = int(np.random.rand()*len(img_name))
        #set input
        img = cv2.imread("dataset/" + img_name[i]+".png",0)
        img_input = [list([0.0 if img[j][k] == -1 else float(img[j][k])]) for j in range(len(img)) for k in range(len(img[j]))]
        #same type as input provided in the tutorial
        xs=np.ndarray(shape=(img.shape[1], 100, 1), buffer=np.array(img_input),dtype=np.float32)
        net.inputs.aset(xs)
        #forward propagation
        net.forward()
        #prediction
        pred = net.outputs.array()
        target = mktarget(transcripts[index],noutput)
        seq = clstm.Sequence()
        seq.aset(target.reshape(-1,noutput,1))
        #align ctc
        aligned = clstm.Sequence()
        clstm.seq_ctc_align(aligned,net.outputs,seq)
        aligned = aligned.array()
        #delta val
        deltas = aligned - net.outputs.array()
        #input delta of aligned ctc and output of network
        net.d_outputs.aset(deltas)
        #backward propagation
        net.backward()
        #update network
        net.update()

    #save network
    clstm.save_net("sample.clstm",net)

kendemu avatar Mar 30 '16 08:03 kendemu