Attention-OCR
Attention-OCR copied to clipboard
How can I use this model to train on other language such as Chinese?
Do you fix this?
@Lzc6996 Do you fix this? :)
I guess only collect pics and texts will do
if you have yourself dataset,you should deal with your data-gen problems,and modify your img size.
I made a successful training with Korean.
my edit is as follows;
0. labels / ord(c) value : Korean's 44032 ~ (10 ~ 251 characters) , 44 . 46
0~9 : 48 - 58 A-Z : 65 - 90
a-z :97~122
-
data_gen.py
for c in lex: #assert 64 < ord(c) < num_char or 47 < ord(c) < 58 #assert 96 < ord(c) < 123 or 47 < ord(c) < 58 #print('c', ord(c)) fp.write(img_path+'\t'+lex+'\t') fp.write(str(ord(c))) word.append( ord(c) - 44032- 43 +43 + 150 +3 if ord(c) > 44031 else ord(c) - 43 + 3) fp.write('\n') fp.close() ''' for c in lex: #assert 96 < ord(c) < num_char or 47 < ord(c) < 58 #0assert 96 < ord(c) < 123 or 47 < ord(c) < 58 print('c', ord(c)) word.append( ord(c) - 97 + 13 if ord(c) > 96 else ord(c) - 48 + 3) ''' word.append(self.EOS) word = np.array(word, dtype=np.int32) word = np.array( [self.GO] + [ord(c) -44032- 43 + 43 + 150 +3 if ord(c) > 44031 # Korean Character starts else ord(c) - 43 + 3
-
model.py def visualize_attention(self, filename, attentions, output_valid, ground_valid, flag_incorrect, real_len): # omit......
with open(os.path.join(output_dir, 'word.txt'), 'w', encoding='utf8') as fword: aaa=[chr(c -3-150-43+44032+43).encode('utf8','ignore') if c-3-150-43+44032+43>44031 else chr(c-3+43).encode('utf8','ignore') for c in ground_valid] fword.write( ' '.join(map(bytes.decode,aaa))) fword.write("\n") aaa=[chr(c -3-150-43+44032+43).encode('utf8','ignore') if c-3-150-43+44032+43>44031 else chr(c-3+43).encode('utf8','ignore') for c in output_valid] fword.write( ' '.join(map(bytes.decode,aaa))) print('fword-o aaa', aaa)
-
exp_config.py LOAD_MODEL = True OLD_MODEL_VERSION = False TARGET_VOCAB_SIZE = 13500 + 256+26+3+26+10+3 # 0: PADDING, 1: GO, 2: EOS, >2: 0-9, a-z
The method previous produces low accuracy because I couldn't give index for the number of training characters and assigned more target_vocal_size. I give better one by assigning index for each training letters and the same number of TARGET_VOCAB_SIZE.
Please refer to https://www.github.com/kspook/Attention-OCR-1/
Detail change logs as follows
1.data_gen.py #line 10 SCRIPT_PATH = os.path.dirname(os.path.abspath(file)) DEFAULT_LABEL_FILE = os.path.join(SCRIPT_PATH, '../labels/bank_labelsS.txt')
#line 118
# 'a':97, '0':48
word = [self.GO]
# , 44 . 46 0~9 :48-58 A-Z : 65-90 a-z :97~122
try:
fp=open('outputs.txt', 'w+')
except:
print('could not open file'+outputs.txt)
quit()
label_file = DEFAULT_LABEL_FILE
with io.open(label_file, 'r', encoding='utf-8') as f:
labels = f.read().splitlines()
for c in lex:
print('c ord(c)', c, ord(c))
for i, l in enumerate(labels):
#print('i l', i , l)
if c== l:
n=i+3
#print('i n : ', i, n)
word.append(n)
'''
else:
print('exit')
exit()
'''
word.append(self.EOS)
#print('ord(c), c: ', ord(c), c)
#print('word:', word)
return img_bw, word
2.bucketdata.py #line 24
label_file = DEFAULT_LABEL_FILE
with io.open(label_file, 'r', encoding='utf-8') as f:
labels = f.read().splitlines()
#print('self, label, filename')
#print(self, label,filename)
for i,c in enumerate(label):
if c> 2:
for j, l in enumerate(labels):
if c== ord(l):
n=j+3
label[i]=n
3.model.py line 426, ~ 428 similar way such as 2.data_gen.py
label_file = DEFAULT_LABEL_FILE
with io.open(label_file, 'r', encoding='utf-8') as f:
labels = f.read().splitlines()
labels_list = enumerate(labels)
#print(labels_list(1))
#print(labels[1])
with open(os.path.join(output_dir, 'word.txt'), 'w', encoding='utf8') as fword:
gv=ground_valid
for i, c in enumerate(ground_valid):
#print('c ord(c)', c, ord(c))
for j, l in enumerate(labels):
#print('j l', j , l)
if (c-3)== j:
##print('j n : ', j, n)
gv[i]=l
print('aaa ground valid', gv, ground_valid)
fword.write(' '.join(gv))
ov=output_valid
for i, c in enumerate(output_valid):
#print('c ord(c)', c, ord(c))
for j, l in enumerate(labels):
#print('j l', j , l)
if (c-3)== j:
##print('j n : ', j, n)
ov[i]=l
print('aaa output valid', ov,output_valid)
fword.write(' '.join(ov))
#fword.write(' '.join([chr(c-13+97) if c-13+97>96 else chr(c-3+48) for c in ground_valid])+'\n')
#fword.write(' '.join([chr(c-13+97) if c-13+97>96 else chr(c-3+48) for c in output_valid]))
@kspook
how to recognize Korean?
I tried your method.. but output is as below
ground valid ['두'] ['두'] output valid [] []
ground valid ['5', '7', '3', '8'] ['5', '7', '3', '8'] output valid [] []
what is problem?
@kspook how to recognize Korean? I tried your method.. but output is as below
ground valid ['두'] ['두'] output valid [] []
ground valid ['5', '7', '3', '8'] ['5', '7', '3', '8'] output valid [] []
what is problem?
did you tried my first suggestion? try my second suggestion.
my first suggestion tries to learn more than the number of characters that I want to learn. so result become wrong.
in my second case, the number of training is the same as I trained. So the result is the same.
In other case, you may fix if you print out or make a log the variables in the process of testing.