text-detection-ctpn
text-detection-ctpn copied to clipboard
how to apply OCR ?
Hi I wanna know how to extract the detected text for Texte-detection-ctpn
https://github.com/eragonruan/text-detection-ctpn/issues/120#issuecomment-488273168
@baronpalacios Did you get how to apply OCR?
Apply OCR is quite easier as in demo.py, it will write all the rect with the coordinates, what you need to do is cut the image via those coordinates, and then using tesseract to do the OCR part.
I replaced writing to text file part with tesseract, but tesseract sucks a big time. Here is the code
import pytesseract as tess
tess.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
"w") as f:
for i, box in enumerate(boxes):
box = boxes[i].tolist()
x1, y1, x2, y2, x3, y3, x4, y4, threshold = box
allys = [y1,y2,y3,y4]
ymin = min(allys)
ymax = max(allys)
allx = [x1,x2,x3,x4]
xmin = min(allx)
xmax = max(allx)
detected_text_img = img[int(ymin):int(
ymax), int(xmin):int(xmax)].copy()
ocr_text = tess.image_to_string(detected_text_img)
f.writelines(ocr_text + os.linesep)
f.close()
hi, max can you tell me how to extract the word after run demo.py? which file should i change to extractt the word?
Here is the full source code of my demo.py
# coding=utf-8
import os
import shutil
import sys
import time
import cv2
import numpy as np
import tensorflow as tf
sys.path.append(os.getcwd())
from nets import model_train as model
from utils.rpn_msr.proposal_layer import proposal_layer
from utils.text_connector.detectors import TextDetector
import pytesseract as tess
tess.pytesseract.tesseract_cmd = '/usr/bin/tesseract' # set tesseract executable path as per your installation
tf.app.flags.DEFINE_string('test_data_path', 'data/demo/', '')
tf.app.flags.DEFINE_string('output_path', 'data/res/', '')
tf.app.flags.DEFINE_string('gpu', '0', '')
tf.app.flags.DEFINE_string('checkpoint_path', 'checkpoints_mlt/', '')
FLAGS = tf.app.flags.FLAGS
def get_images():
files = []
exts = ['jpg', 'png', 'jpeg', 'JPG']
for parent, dirnames, filenames in os.walk(FLAGS.test_data_path):
for filename in filenames:
for ext in exts:
if filename.endswith(ext):
files.append(os.path.join(parent, filename))
break
print('Find {} images'.format(len(files)))
return files
def resize_image(img):
img_size = img.shape
im_size_min = np.min(img_size[0:2])
im_size_max = np.max(img_size[0:2])
im_scale = float(600) / float(im_size_min)
if np.round(im_scale * im_size_max) > 1200:
im_scale = float(1200) / float(im_size_max)
new_h = int(img_size[0] * im_scale)
new_w = int(img_size[1] * im_scale)
new_h = new_h if new_h // 16 == 0 else (new_h // 16 + 1) * 16
new_w = new_w if new_w // 16 == 0 else (new_w // 16 + 1) * 16
re_im = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
return re_im, (new_h / img_size[0], new_w / img_size[1])
def main(argv=None):
if os.path.exists(FLAGS.output_path):
shutil.rmtree(FLAGS.output_path)
os.makedirs(FLAGS.output_path)
os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
with tf.get_default_graph().as_default():
input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')
global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
bbox_pred, cls_pred, cls_prob = model.model(input_image)
variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
saver = tf.train.Saver(variable_averages.variables_to_restore())
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
print('Restore from {}'.format(model_path))
saver.restore(sess, model_path)
im_fn_list = get_images()
for im_fn in im_fn_list:
print('===============')
print(im_fn)
start = time.time()
try:
im = cv2.imread(im_fn)[:, :, ::-1]
except:
print("Error reading image {}!".format(im_fn))
continue
img, (rh, rw) = resize_image(im)
h, w, c = img.shape
im_info = np.array([h, w, c]).reshape([1, 3])
bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
feed_dict={input_image: [img],
input_im_info: im_info})
textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
scores = textsegs[:, 0]
textsegs = textsegs[:, 1:5]
textdetector = TextDetector(DETECT_MODE='H')
boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
boxes = np.array(boxes, dtype=np.int)
cost_time = (time.time() - start)
print("cost time: {:.2f}s".format(cost_time))
for i, box in enumerate(boxes):
cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
thickness=2)
img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])
with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
"w") as f:
for i, box in enumerate(boxes):
box = boxes[i].tolist()
x1, y1, x2, y2, x3, y3, x4, y4, threshold = box
allys = [y1, y2, y3, y4]
ymin = min(allys)
ymax = max(allys)
allx = [x1, x2, x3, x4]
xmin = min(allx)
xmax = max(allx)
detected_text_img = img[int(ymin):int(
ymax), int(xmin):int(xmax)].copy()
ocr_text = tess.image_to_string(detected_text_img, lang='eng', config='--psm 6 --dpi 66 --oem 3')
f.writelines(ocr_text + os.linesep)
f.close()
if __name__ == '__main__':
tf.app.run()
hi, max can you tell me how to extract the word after run demo.py? which file should i change to extractt the word?
I have shared code above. You also need to install tesseract and pytesseract Set path to tesseract exe file
hi, max can you tell me how to extract the word after run demo.py? which file should i change to extractt the word?
I have shared code above. You also need to install tesseract and pytesseract Set path to tesseract exe file
Hi Max,
Thank you for the code, but did you get the text in return? I am getting all sorts of weird characters, is this maybe due to the way you set up the OCR engine mode --oem and the page segmentation mode --psm (config='--psm 6 --dpi 66 --oem 3')?
Tesseract doesn't automagically picks text from any image. Image has to be clear, better if it is in grayscale.