text_extraction
text_extraction copied to clipboard
Sometimes regions are detected which are invalid and have wrong data
Sometimes regions are detected which are not text and when they are being drawn they have pixel width wider than the bounding box pixel width. The following adjustment to units.h detects the problem and prevents the problem, but I don't know the cause of the bug.
int widthFromPixels(int imageWidth, int imageHeight, std::vector<int>& pixels) {
int width = -1;
int left = -1;
int right = -1;
for (int p=0; p<pixels.size(); p++)
{
int pixel = pixels.at(p);
int x = pixel % imageWidth;
//int y = pixel / imageWidth;
if (left < 0) left = x; else
if (x < left) left = x;
if (right < 0) right = x; else
if (x > right) right = x;
}
width = right - left;
return width + 1;
}
void drawClusters(Mat& img, vector<Region> *regions, vector<vector<int> > *meaningful_clusters)
{
// printf("w %d h %d\n", imageWidth, imageHeight);
//img = img*0;
uchar* rsptr = (uchar*)img.data;
for (int i=0; i<meaningful_clusters->size(); i++)
{
for (int c=0; c<meaningful_clusters->at(i).size(); c++)
{
int pixelWidth = widthFromPixels(img.cols, img.rows, regions->at(meaningful_clusters->at(i).at(c)).pixels_);
int boundingBoxWidth = regions->at(meaningful_clusters->at(i).at(c)).bbox_.width;
// There is a bug where the pixel width does not equal the boundingBoxWidth,
// I don't know the cause of the bug except that I can prevent this region from drawing by doing
// the following check here.
if (pixelWidth == boundingBoxWidth) {
for (int p=0; p<regions->at(meaningful_clusters->at(i).at(c)).pixels_.size(); p++)
{
rsptr[regions->at(meaningful_clusters->at(i).at(c)).pixels_.at(p)*3] = bcolors[i%9][2];
rsptr[regions->at(meaningful_clusters->at(i).at(c)).pixels_.at(p)*3+1] = bcolors[i%9][1];
rsptr[regions->at(meaningful_clusters->at(i).at(c)).pixels_.at(p)*3+2] = bcolors[i%9][0];
}
}
}
}
}