tesseract.js icon indicating copy to clipboard operation
tesseract.js copied to clipboard

Querying from multiple processes

Open herecy opened this issue 3 years ago • 2 comments

It seems when querying tesseract.js from multiple threads, the eng.traineddata is locked and throws errors like

Error opening data file ./eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language 'eng' Error opening data file ./eng.traineddata Tesseract couldn't load any languages!

To reproduce this:

const { Worker } = require('worker_threads')

function runService(workerData) {
  return new Promise((resolve, reject) => {
    const worker = new Worker('./databaseTests.js', { workerData });
    worker.on('message', resolve);
    worker.on('error', reject);
    worker.on('exit', (code) => {
      if (code !== 0)
        reject(new Error(`Worker stopped with exit code ${code}`));
    })
  })
}

async function run() {
  const result = await runService('world')
  console.log(result);
}

(async () => {
    while(true){
        run().catch(err => console.error(err))
        await new Promise(resolve => setTimeout(resolve, 5));
    }
})();

In databaseTests I am simply creating a new worker:

const { createWorker } = require('tesseract.js');
const path = require('path');

const worker = createWorker({
  langPath: path.join(__dirname, '..', 'lang-data'), 
  logger: m => console.log(m),
});

(async () => {
  await worker.load();
  await worker.loadLanguage('eng');
  await worker.initialize('eng');
  const { data: { text } } = await worker.recognize(path.join(__dirname, '..', 'images', 'testocr.png'));
  console.log(text);
  await worker.terminate();
})();

Is there any way to reuse the worker or to skip checking the trained data file?

herecy avatar Nov 12 '21 12:11 herecy

Likely duplicate of #576. Will look into it.

Balearica avatar Sep 18 '22 04:09 Balearica

@herecy I was able to reproduce this bug in version 2, but not the current version (v3). If this issue is still active, please confirm you still encounter this bug in the latest version and provide a reproducible example.

Balearica avatar Sep 18 '22 06:09 Balearica

Closing this issue as we implemented multiple changes that should greatly reduce or eliminate errors with cached .traineddata files in v3 and v4. If you experience this type of error using v4, please open a new issue.

Balearica avatar Nov 25 '22 20:11 Balearica