Pytesser icon indicating copy to clipboard operation
Pytesser copied to clipboard

pytesser.TesseractNotFound

Open PGillner opened this issue 6 years ago • 1 comments

I keep getting the error:

Traceback (most recent call last):
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\test.py", line 6, in <module>
    txt = pytesser.image_to_string(image)
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\pytesser.py", line 60, in image_to_string
    txt = image_file_to_string(TEMP_IMAGE, lang, psm)
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\pytesser.py", line 65, in image_file_to_string
    check_path() #Check if tesseract available in the path
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\pytesser.py", line 35, in check_path
    raise TesseractNotFound()
pytesser.TesseractNotFound

But I'm really unsure why.

pytesser.py

import sys
from subprocess import Popen, PIPE
import os
import tempfile
import cv2

PROG_NAME = 'tesseract.exe'
TEMP_IMAGE = tempfile.mktemp()+'.bmp'
TEMP_FILE = tempfile.mktemp()

#All the PSM arguments as a variable name (avoid having to know them)
PSM_OSD_ONLY = 0
PSM_SEG_AND_OSD = 1
PSM_SEG_ONLY = 2
PSM_AUTO = 3
PSM_SINGLE_COLUMN = 4
PSM_VERTICAL_ALIGN = 5
PSM_UNIFORM_BLOCK = 6
PSM_SINGLE_LINE = 7
PSM_SINGLE_WORD = 8
PSM_SINGLE_WORD_CIRCLE = 9
PSM_SINGLE_CHAR = 10

class TesseractException(Exception): #Raised when tesseract does not return 0
    pass

class TesseractNotFound(Exception): #When tesseract is not found in the path
    pass

def check_path(): #Check if tesseract is in the path raise TesseractNotFound otherwise
    for path in os.environ.get('PATH', '').split(':'):
        filepath = os.path.join(path, PROG_NAME)
        if os.path.exists(filepath) and not os.path.isdir(filepath):
            return True
    raise TesseractNotFound()

def process_request(input_file, output_file, lang=None, psm=None):
    args = [PROG_NAME, input_file, output_file] #Create the arguments
    if lang is not None:
        args.append("-l")
        args.append(lang)
    if psm is not None:
        args.append("-psm")
        args.append(str(psm))
    proc = Popen(args, stdout=PIPE, stderr=PIPE) #Open process
    ret = proc.communicate() #Launch it

    code = proc.returncode
    if code != 0:
        if code == 2:
            raise TesseractException("File not found")
        if code == -11:
            raise TesseractException("Language code invalid: "+ret[1])
        else:
            raise TesseractException(ret[1])

def image_to_string(im, lang=None, psm=None):
    grayscale_image = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    cv2.imwrite(TEMP_IMAGE, grayscale_image)
    txt = image_file_to_string(TEMP_IMAGE, lang, psm)
    os.remove(TEMP_IMAGE)
    return txt

def image_file_to_string(file, lang=None, psm=None):
    check_path() #Check if tesseract available in the path
    grayscale_image = cv2.cvtColor(cv2.imread(file), cv2.COLOR_BGR2GRAY)
    cv2.imwrite(TEMP_IMAGE, grayscale_image)
    #process_request(file, TEMP_FILE, lang, psm)
    process_request(TEMP_IMAGE, TEMP_FILE, lang, psm)
    f = open(TEMP_FILE+".txt", "r") #Open back the file
    txt = f.read()
    f.close()
    os.remove(TEMP_FILE+".txt")
    os.remove(TEMP_IMAGE)
    return txt


if __name__ =='__main__':
    print(image_file_to_string(sys.argv[2], sys.argv[1], PSM_AUTO))

PGillner avatar Mar 21 '18 10:03 PGillner

Is the tesseract.exe executable in you path ? It has to be reachable so that subprocess can launch it. pytesser is just a simple wrapper around this executable.

RobinDavid avatar Mar 21 '18 10:03 RobinDavid