text-detection-ctpn icon indicating copy to clipboard operation
text-detection-ctpn copied to clipboard

how to apply OCR ?

Open baronpalacios opened this issue 5 years ago • 9 comments

Hi I wanna know how to extract the detected text for Texte-detection-ctpn

baronpalacios avatar Mar 21 '19 08:03 baronpalacios

https://github.com/eragonruan/text-detection-ctpn/issues/120#issuecomment-488273168

kspook avatar May 01 '19 12:05 kspook

@baronpalacios Did you get how to apply OCR?

kalai2033 avatar Aug 12 '19 14:08 kalai2033

Apply OCR is quite easier as in demo.py, it will write all the rect with the coordinates, what you need to do is cut the image via those coordinates, and then using tesseract to do the OCR part.

cloudhuang avatar Sep 05 '19 03:09 cloudhuang

I replaced writing to text file part with tesseract, but tesseract sucks a big time. Here is the code

import pytesseract as tess
tess.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
    for i, box in enumerate(boxes):
        box = boxes[i].tolist()
        x1, y1, x2, y2, x3, y3, x4, y4, threshold = box
        allys = [y1,y2,y3,y4]
        ymin = min(allys)
        ymax = max(allys)
        allx = [x1,x2,x3,x4]
        xmin = min(allx)
        xmax = max(allx)
        detected_text_img = img[int(ymin):int(
                ymax), int(xmin):int(xmax)].copy()
        ocr_text = tess.image_to_string(detected_text_img)
        f.writelines(ocr_text + os.linesep)
    f.close()

maxpaynestory avatar Mar 25 '20 10:03 maxpaynestory

hi, max can you tell me how to extract the word after run demo.py? which file should i change to extractt the word?

sevany avatar Apr 17 '20 07:04 sevany

Here is the full source code of my demo.py

# coding=utf-8
import os
import shutil
import sys
import time

import cv2
import numpy as np
import tensorflow as tf

sys.path.append(os.getcwd())
from nets import model_train as model
from utils.rpn_msr.proposal_layer import proposal_layer
from utils.text_connector.detectors import TextDetector

import pytesseract as tess
tess.pytesseract.tesseract_cmd = '/usr/bin/tesseract' # set tesseract executable path as per your installation

tf.app.flags.DEFINE_string('test_data_path', 'data/demo/', '')
tf.app.flags.DEFINE_string('output_path', 'data/res/', '')
tf.app.flags.DEFINE_string('gpu', '0', '')
tf.app.flags.DEFINE_string('checkpoint_path', 'checkpoints_mlt/', '')
FLAGS = tf.app.flags.FLAGS


def get_images():
    files = []
    exts = ['jpg', 'png', 'jpeg', 'JPG']
    for parent, dirnames, filenames in os.walk(FLAGS.test_data_path):
        for filename in filenames:
            for ext in exts:
                if filename.endswith(ext):
                    files.append(os.path.join(parent, filename))
                    break
    print('Find {} images'.format(len(files)))
    return files


def resize_image(img):
    img_size = img.shape
    im_size_min = np.min(img_size[0:2])
    im_size_max = np.max(img_size[0:2])

    im_scale = float(600) / float(im_size_min)
    if np.round(im_scale * im_size_max) > 1200:
        im_scale = float(1200) / float(im_size_max)
    new_h = int(img_size[0] * im_scale)
    new_w = int(img_size[1] * im_scale)

    new_h = new_h if new_h // 16 == 0 else (new_h // 16 + 1) * 16
    new_w = new_w if new_w // 16 == 0 else (new_w // 16 + 1) * 16

    re_im = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
    return re_im, (new_h / img_size[0], new_w / img_size[1])


def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                                  thickness=2)
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        box = boxes[i].tolist()
                        x1, y1, x2, y2, x3, y3, x4, y4, threshold = box
                        allys = [y1, y2, y3, y4]
                        ymin = min(allys)
                        ymax = max(allys)
                        allx = [x1, x2, x3, x4]
                        xmin = min(allx)
                        xmax = max(allx)
                        detected_text_img = img[int(ymin):int(
                            ymax), int(xmin):int(xmax)].copy()
                        ocr_text = tess.image_to_string(detected_text_img, lang='eng', config='--psm 6 --dpi 66 --oem 3')
                        f.writelines(ocr_text + os.linesep)
                    f.close()


if __name__ == '__main__':
    tf.app.run()

maxpaynestory avatar Apr 17 '20 08:04 maxpaynestory

hi, max can you tell me how to extract the word after run demo.py? which file should i change to extractt the word?

I have shared code above. You also need to install tesseract and pytesseract Set path to tesseract exe file

maxpaynestory avatar Apr 17 '20 08:04 maxpaynestory

hi, max can you tell me how to extract the word after run demo.py? which file should i change to extractt the word?

I have shared code above. You also need to install tesseract and pytesseract Set path to tesseract exe file

Hi Max,

Thank you for the code, but did you get the text in return? I am getting all sorts of weird characters, is this maybe due to the way you set up the OCR engine mode --oem and the page segmentation mode --psm (config='--psm 6 --dpi 66 --oem 3')?

ayouceff avatar Sep 07 '20 09:09 ayouceff

Tesseract doesn't automagically picks text from any image. Image has to be clear, better if it is in grayscale.

maxpaynestory avatar Sep 07 '20 10:09 maxpaynestory