Transformers-Tutorials icon indicating copy to clipboard operation
Transformers-Tutorials copied to clipboard

paddleocr in tabletransformer error

Open tzktz opened this issue 1 year ago • 0 comments

hi @NielsRogge , i have use paddleocr instead of easyocr in table transfomer nd i have an issue can u resolve it !!!

# load paddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

def apply_ocr(cell_coordinates, cropped_table):
    # let's OCR row by row
    data = dict()
    max_num_columns = 0
    for idx, row in enumerate(cell_coordinates):
        row_text = []
        for cell in row["cells"]:
            # crop cell out of image
            cell_image = np.array(cropped_table.crop(cell["cell"]))
            # apply OCR
            result = ocr.ocr(np.array(cell_image), cls=True)
            if len(result) > 0:
                text = " ".join([x[1] for x in result])
                row_text.append(text)

        if len(row_text) > max_num_columns:
            max_num_columns = len(row_text)

        data[str(idx)] = row_text

    # pad rows which don't have max_num_columns elements
    # to make sure all rows have the same number of columns
    for idx, row_data in data.copy().items():
        if len(row_data) != max_num_columns:
            row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
        data[str(idx)] = row_data

    # write to csv
    with open('output.csv', 'w') as result_file:
        wr = csv.writer(result_file, dialect='excel')

        for row, row_text in data.items():
            wr.writerow(row_text)

    # return as Pandas dataframe
    df = pd.read_csv('output.csv')

    return df, data

The error message i got is..

  text = " ".join([x[1] for x in result])
IndexError: list index out of range

tzktz avatar Jan 29 '24 05:01 tzktz