codelab-mlkit-android
codelab-mlkit-android copied to clipboard
Text Recognition Text Bounding Box Off
Android Studio Version: Android Studio Flamingo | 2022.2.1 Patch 1
Problem: The Text Recognition bounding box is off.
This can be best seen by this stack overflow post, whose solution doesnt solve the problem for me. https://stackoverflow.com/questions/66624836/android-locating-words-on-the-screen-google-ml-kit-bounding-boxes-are-off-a-bi
Image Reference from the stack overflow - please note my code doesnt provide boxes that have this styling: https://i.stack.imgur.com/idVzM.jpg
I tried to follow the MLKit guide (https://codelabs.developers.google.com/codelabs/mlkit-android#4) but this doesnt run.
--
What I'm doing:
I am passing a bitmap selected from the photo gallery into the text recognition and it is not finding the boxes precisely.
Code found below
--
build.gradle.kts
plugins {
id("com.android.application")
id("org.jetbrains.kotlin.android")
kotlin("plugin.serialization")
}
val versionMajor = 1
val versionMinor = 0
val versionPatch = 1
val versionBuild = 1 // Bump for dogfood builds, public betas, etc.
android {
namespace = "--"
compileSdk = 33
defaultConfig {
applicationId = "--"
minSdk = 30
targetSdk = 33
versionCode = versionMajor * 10000 + versionMinor * 1000 + versionPatch * 100 + versionBuild
versionName = "${versionMajor}.${versionMinor}.${versionPatch}"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
vectorDrawables {
useSupportLibrary = true
}
}
buildTypes {
release {
// Enables code shrinking, obfuscation, and optimization for only
// your project's release build type.
isMinifyEnabled = true
// Enables resource shrinking, which is performed by the
// Android Gradle plugin.
isShrinkResources = true
// Includes the default ProGuard rules files that are packaged with
// the Android Gradle plugin. To learn more, go to the section about
// R8 configuration files.
proguardFiles(
getDefaultProguardFile("proguard-android-optimize.txt"),
"proguard-rules.pro"
)
}
}
compileOptions {
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = "1.8"
}
buildFeatures {
compose = true
}
composeOptions {
kotlinCompilerExtensionVersion = "1.4.5"
}
packaging {
resources {
excludes += "/META-INF/{AL2.0,LGPL2.1}"
}
}
}
// CameraX
val cameraxVersion = "1.0.1"
dependencies {
implementation ("androidx.core:core-ktx:1.10.1")
implementation ("androidx.lifecycle:lifecycle-runtime-ktx:2.6.1")
implementation ("androidx.activity:activity-compose:1.7.1")
implementation("androidx.fragment:fragment-ktx:1.5.7")
implementation (platform("androidx.compose:compose-bom:2022.10.00"))
implementation ("androidx.compose.ui:ui")
implementation ("androidx.compose.ui:ui-graphics")
implementation ("androidx.compose.ui:ui-tooling-preview")
implementation ("androidx.compose.material3:material3")
testImplementation ("junit:junit:4.13.2")
androidTestImplementation ("androidx.test.ext:junit:1.1.5")
androidTestImplementation ("androidx.test.espresso:espresso-core:3.5.1")
androidTestImplementation (platform("androidx.compose:compose-bom:2022.10.00"))
androidTestImplementation ("androidx.compose.ui:ui-test-junit4")
debugImplementation ("androidx.compose.ui:ui-tooling")
debugImplementation ("androidx.compose.ui:ui-test-manifest")
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.5.0")
implementation("com.google.code.gson:gson:2.8.9")
implementation("androidx.constraintlayout:constraintlayout-compose:1.0.1")
// CAMERA
implementation("androidx.camera:camera-camera2:$cameraxVersion")
implementation("androidx.camera:camera-lifecycle:$cameraxVersion")
implementation("androidx.camera:camera-view:1.3.0-alpha06")
// Datastore
implementation("androidx.datastore:datastore-preferences:1.0.0")
implementation("androidx.datastore:datastore-preferences-rxjava2:1.0.0")
implementation("androidx.datastore:datastore-preferences-rxjava3:1.0.0")
// MLKit
implementation("com.google.android.gms:play-services-mlkit-text-recognition:19.0.0")
}
--
Algorithm:
@SuppressLint("UnsafeOptInUsageError")
fun TextRecognitionCoordinator.analyzeMedia(image: Bitmap) {
val orientation = context?.display?.rotation ?: return
CoroutineScope(Dispatchers.IO).launch {
val image = InputImage.fromBitmap(image, 0)
var elementArray: Array<Text.TextBlock> = arrayOf()
// Pass image to an ML Kit Vision API
recognizer.process(image)
.addOnSuccessListener { visionText ->
// Task completed successfully
if (TextRecognitionCoordinator.debug) {
Log.i(
"${TextRecognitionCoordinator.identifier}",
"${DebuggingIdentifiers.actionOrEventSucceded} processImage | processed recognition"
)
}
visionText.textBlocks.filter { it -> isPrice(it.text) }.map { it ->
if (TextRecognitionCoordinator.debug) {
Log.i(
"${TextRecognitionCoordinator.identifier}",
"${DebuggingIdentifiers.actionOrEventSucceded} analyzeMedia | text: ${it.text}"
)
}
elementArray = elementArray.plus(it)
}
if (TextRecognitionCoordinator.debug) {
Log.i(
"${TextRecognitionCoordinator.identifier}",
"${DebuggingIdentifiers.actionOrEventSucceded} analyzeMedia | closing with count : ${elementArray.count()}"
)
}
}
.addOnFailureListener { e ->
// Task failed with an exception
// ...
Log.e(
"${TextRecognitionCoordinator.identifier}",
"${DebuggingIdentifiers.actionOrEventFailed} analyzeMedia | Failed to recognize text. Error: $e"
)
// Return Image Array
onProcessMedia?.invoke(elementArray)
// Close Image Proxy
}
.addOnCompleteListener { it ->
// Return Image Array
onProcessMedia?.invoke(elementArray)
// imageProxy.close()
}
}
}
--
Composable that's drawn on a full screen card
@Composable
fun createBoundingBox(element: Text.TextBlock) {
val boundingBox = element.boundingBox ?: return
Log.i(
"${identifier}",
"${DebuggingIdentifiers.actionOrEventSucceded} bounding box ${boundingBox} | top ${boundingBox.top} | left ${boundingBox.left}."
)
// Note that that screen width and screen height are the devices width and height.
val wRatio = screenWidth / imageWidth
val hRatio = screenHeight / imageHeight
val elementTop = boundingBox.top * hRatio
val elementLeft = boundingBox.left * wRatio
val elementWidth = boundingBox.width() * wRatio
val elementHeight = boundingBox.height() * hRatio
Box(
modifier = Modifier
.constrainAs(createRef()) {
top.linkTo(
parent.top,
elementTop.dp
)
absoluteLeft.linkTo(
parent.absoluteLeft,
elementLeft.dp
)
width = Dimension.value(elementWidth.dp)
height = Dimension.value(elementHeight.dp)
}
.background(Color.Red),
content = {}
)
}