Detect language in EasyOCR at runtime for multi-language image
The requirement in EasyOCR seems to be that it can be loaded only with a set of mutually compatible languages. Like English is compatible with all languages, but Spanish is not necessarily compatible with Japanese. We have a situation - the image we will have can have texts with multiple languages in various areas, and there is no guarantee whether they will be mutually compatible sets. Not to mention, the languages are not known at runtime, it can be any combination of languages. How can EasyOCR be used to detect the languages for the various parts of the image and do OCR for them separately? We could have loaded EasyOCR with all languages to cover them all, problem is, all languages are not compatible with each other, so that tactic cannot be used.
Example image:
Works great, use it
import easyocr
import concurrent.futures
from PIL import Image
import numpy as np
from easyocr.utils import get_paragraph
from easyocr.config import (
latin_lang_list,
arabic_lang_list,
cyrillic_lang_list,
)
bengali_lang_list = ['bn', 'as']
devanagari_lang_list = [
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom'
]
other_lang_list = ['th', 'ch_sim', 'ch_tra', 'ja', 'ko', 'te', 'kn']
readers_map = {
'latin_langs': easyocr.Reader(latin_lang_list, gpu=True),
'arabic_langs': easyocr.Reader(arabic_lang_list, gpu=True),
'bengali_langs': easyocr.Reader(bengali_lang_list, gpu=True),
'cyrillic_langs': easyocr.Reader(cyrillic_lang_list, gpu=True),
'devanagari_langs': easyocr.Reader(devanagari_lang_list, gpu=True),
}
for lang in other_lang_list:
readers_map[lang] = easyocr.Reader([lang], gpu=True)
def boxes_are_close(box_a, box_b, eps=3):
"""
Простейшее сравнение координат двух боксов (список из 4 точек).
eps – допустимая разница в пикселях.
"""
xs_a = [pt[0] for pt in box_a]
ys_a = [pt[1] for pt in box_a]
min_x_a, max_x_a = min(xs_a), max(xs_a)
min_y_a, max_y_a = min(ys_a), max(ys_a)
xs_b = [pt[0] for pt in box_b]
ys_b = [pt[1] for pt in box_b]
min_x_b, max_x_b = min(xs_b), max(xs_b)
min_y_b, max_y_b = min(ys_b), max(ys_b)
if (
abs(min_x_a - min_x_b) <= eps
and abs(min_y_a - min_y_b) <= eps
and abs(max_x_a - max_x_b) <= eps
and abs(max_y_a - max_y_b) <= eps
):
return True
return False
def recognize_text_from_image(img: Image.Image,
x_ths=1.0,
y_ths=0.5,
run_parallel=True):
"""
Распознаём текст всеми ридерами (из readers_map).
Если run_parallel=True, используем потоки для одновременного распознавания.
Если run_parallel=False, обрабатываем ридеров по очереди (меньше пиковое использование GPU).
1) Собираем все результаты (detail=1) в список вида [[box, text, conf, reader_name], ...].
2) Группируем результаты по близким bounding-box, берём у каждого бокса запись с max confidence.
3) Делим результаты на две группы: 'arabic_langs' и 'остальные'.
4) Для каждой группы вызываем get_paragraph(...), передавая:
- mode='rtl', если arabic_langs
- mode='ltr' иначе
5) Возвращаем список параграфов (сначала арабские, потом все остальные).
Каждый параграф — [box, text, conf].
"""
base_np_img = np.array(img.convert("RGB"))
def run_reader(reader, name):
# Возвращаем [[box, text, conf, name], ...]
results = reader.readtext(base_np_img, detail=1)
extended = []
for (box, txt, cf) in results:
extended.append([box, txt, cf, name])
return extended
# Шаг 1. Собираем результаты со всех ридеров в raw_all
raw_all = []
if run_parallel:
# --- ПАРАЛЛЕЛЬНЫЙ РЕЖИМ ---
with concurrent.futures.ThreadPoolExecutor() as executor:
future_map = {
executor.submit(run_reader, rdr, rname): rname
for rname, rdr in readers_map.items()
}
for future in concurrent.futures.as_completed(future_map):
reader_name = future_map[future]
try:
reader_results = future.result()
raw_all.extend(reader_results)
except Exception as e:
print(f'Ошибка в ридере "{reader_name}": {e}')
else:
# --- ПОСЛЕДОВАТЕЛЬНЫЙ РЕЖИМ ---
for rname, rdr in readers_map.items():
try:
reader_results = run_reader(rdr, rname)
raw_all.extend(reader_results)
except Exception as e:
print(f'Ошибка в ридере "{rname}": {e}')
# Шаг 2. "Схлопываем" боксы с близкими координатами,
# выбираем вариант с самым высоким confidence
grouped = []
used = [False] * len(raw_all)
for i, item_i in enumerate(raw_all):
if used[i]:
continue
box_i, text_i, conf_i, rdr_i = item_i
duplicates_idx = [i]
for j in range(i + 1, len(raw_all)):
if used[j]:
continue
box_j, text_j, conf_j, rdr_j = raw_all[j]
if boxes_are_close(box_i, box_j, eps=0):
duplicates_idx.append(j)
# Ищем лучший из дубликатов по conf
best_idx = i
best_conf = conf_i
for d_idx in duplicates_idx:
_, _, c, _ = raw_all[d_idx]
if c > best_conf:
best_idx = d_idx
best_conf = c
grouped.append(raw_all[best_idx])
# Помечаем все эти дубликаты как использованные
for d_idx in duplicates_idx:
used[d_idx] = True
# Шаг 3. Разделяем на arabic / прочие
arabic_list = []
other_list = []
for (box, txt, cf, rdr_name) in grouped:
if rdr_name == 'arabic_langs':
arabic_list.append([box, txt, cf])
else:
other_list.append([box, txt, cf])
# Шаг 4. Прогоняем get_paragraph
arabic_paragraphs = []
other_paragraphs = []
if arabic_list:
arabic_paragraphs = get_paragraph(arabic_list, x_ths=x_ths, y_ths=y_ths, mode='rtl')
if other_list:
other_paragraphs = get_paragraph(other_list, x_ths=x_ths, y_ths=y_ths, mode='ltr')
# Шаг 5. Возвращаем общий список
return arabic_paragraphs + other_paragraphs
if __name__ == '__main__':
# Test scenario
test_image = Image.open('/home/gordey/Pictures/test-photos/cover6_1438789097-576x288.png')
x_ths_val = 1.0
y_ths_val = 0.5
result = recognize_text_from_image(test_image, x_ths=x_ths_val, y_ths=y_ths_val)
for item in result:
print(item) # [box, text, conf]