Transformers-Tutorials
Transformers-Tutorials copied to clipboard
paddleocr in tabletransformer error
hi @NielsRogge , i have use paddleocr instead of easyocr in table transfomer nd i have an issue can u resolve it !!!
# load paddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
def apply_ocr(cell_coordinates, cropped_table):
# let's OCR row by row
data = dict()
max_num_columns = 0
for idx, row in enumerate(cell_coordinates):
row_text = []
for cell in row["cells"]:
# crop cell out of image
cell_image = np.array(cropped_table.crop(cell["cell"]))
# apply OCR
result = ocr.ocr(np.array(cell_image), cls=True)
if len(result) > 0:
text = " ".join([x[1] for x in result])
row_text.append(text)
if len(row_text) > max_num_columns:
max_num_columns = len(row_text)
data[str(idx)] = row_text
# pad rows which don't have max_num_columns elements
# to make sure all rows have the same number of columns
for idx, row_data in data.copy().items():
if len(row_data) != max_num_columns:
row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
data[str(idx)] = row_data
# write to csv
with open('output.csv', 'w') as result_file:
wr = csv.writer(result_file, dialect='excel')
for row, row_text in data.items():
wr.writerow(row_text)
# return as Pandas dataframe
df = pd.read_csv('output.csv')
return df, data
The error message i got is..
text = " ".join([x[1] for x in result])
IndexError: list index out of range