@ -7,6 +7,9 @@ import android.util.Size
import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.text.TextRecognition
import com.google.mlkit.vision.text.latin.TextRecognizerOptions
import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions
import com.google.mlkit.vision.text.japanese.JapaneseTextRecognizerOptions
import com.google.mlkit.vision.text.korean.KoreanTextRecognizerOptions
import com.quillstudios.pokegoalshelper.ml.Detection
import com.quillstudios.pokegoalshelper.PokemonInfo
import com.quillstudios.pokegoalshelper.PokemonStats
@ -34,26 +37,45 @@ class PokemonDataExtractorImpl(
// OCR Configuration
private const val DEFAULT_TIMEOUT_SECONDS = 10L
private const val INDIVIDUAL_OCR_TIMEOUT_SECONDS = 5L
private const val BBOX_EXPANSION_FACTOR = 0.05f // 5% expansion for better OCR
private const val BBOX_EXPANSION_FACTOR = 0.1f // Increased to 10% for better text capture
// OCR Image Processing Constants
private const val MIN_OCR_WIDTH = 50
private const val MIN_OCR_HEIGHT = 50
private const val GAUSSIAN_BLUR_KERNEL_SIZE = 3.0
private const val GAUSSIAN_BLUR_SIGMA = 0.5
private const val CLAHE_CLIP_LIMIT = 1.5
private const val CLAHE_TILE_SIZE = 8.0
private const val MIN_OCR_WIDTH = 64
private const val MIN_OCR_HEIGHT = 32
private const val TARGET_OCR_HEIGHT = 64 // Optimal height for ML Kit
// Multi-scale processing
private const val SCALE_FACTOR_1 = 2.0 // 2x upscale
private const val SCALE_FACTOR_2 = 3.0 // 3x upscale for small text
// Contrast enhancement
private const val CLAHE_CLIP_LIMIT = 2.0 // Increased for better contrast
private const val CLAHE_TILE_SIZE = 4.0 // Smaller tiles for finer control
// Noise reduction
private const val MORPHOLOGY_KERNEL_SIZE = 2
private const val BILATERAL_FILTER_D = 5
private const val BILATERAL_SIGMA_COLOR = 50.0
private const val BILATERAL_SIGMA_SPACE = 50.0
}
private var screenSize : Size ? = null
private var ocrTimeout : Long = DEFAULT_TIMEOUT_SECONDS
// OCR Engines - Multiple script recognizers for international Pokemon names
private val latinRecognizer = TextRecognition . getClient ( TextRecognizerOptions . DEFAULT_OPTIONS )
private val japaneseRecognizer = TextRecognition . getClient ( JapaneseTextRecognizerOptions . Builder ( ) . build ( ) )
private val chineseRecognizer = TextRecognition . getClient ( ChineseTextRecognizerOptions . Builder ( ) . build ( ) )
private val koreanRecognizer = TextRecognition . getClient ( KoreanTextRecognizerOptions . Builder ( ) . build ( ) )
// Dedicated dispatcher for OCR operations
private val ocrDispatcher = Executors . newFixedThreadPool ( 4 ) . asCoroutineDispatcher ( )
override suspend fun extractPokemonInfo ( detections : List < Detection > , screenMat : Mat ) : PokemonInfo ? {
return withContext ( Dispatchers . IO ) {
try {
PGHLog . i ( TAG , " 🔧 Using ML Kit OCR engine " )
PGHLog . i ( TAG , " 🎯 Extracting Pokemon info from ${detections.size} detections " )
// Group detections by type for easy lookup
@ -95,6 +117,12 @@ class PokemonDataExtractorImpl(
}
override fun cleanup ( ) {
// Close all recognizers
latinRecognizer . close ( )
japaneseRecognizer . close ( )
chineseRecognizer . close ( )
koreanRecognizer . close ( )
ocrDispatcher . close ( )
PGHLog . d ( TAG , " 🧹 PokemonDataExtractor cleanup completed " )
}
@ -188,26 +216,60 @@ class PokemonDataExtractorImpl(
// Extract region of interest
val roi = Mat ( screenMat , expandedBbox )
val processedRoi = preprocessImageForOCR ( roi )
val bitmap = Bitmap . createBitmap ( processedRoi . cols ( ) , processedRoi . rows ( ) , Bitmap . Config . ARGB_8888 )
try {
// Convert to bitmap for ML Kit
Utils . matToBitmap ( processedRoi , bitmap )
// Test both raw and processed ROI for numeric fields
val useRawForNumbers = key . contains ( " hp " ) || key . contains ( " attack " ) || key . contains ( " defense " ) ||
key . contains ( " spAttack " ) || key . contains ( " spDefense " ) || key . contains ( " speed " ) ||
key . contains ( " level " ) || key . contains ( " national_dex " )
// Perform OCR
performOCRWithTimeout ( bitmap , key )
val processedRoi = if ( useRawForNumbers ) {
// For numbers, try minimal processing - just ensure it's grayscale
val gray = Mat ( )
if ( roi . channels ( ) > 1 ) {
Imgproc . cvtColor ( roi , gray , Imgproc . COLOR_BGR2GRAY )
} else {
roi . copyTo ( gray )
}
gray
} else {
// For text, use specialized preprocessing based on field type
// For now, use enhanced preprocessing for all text
// TODO: Implement specialized preprocessing for types
preprocessImageForOCREnhanced ( roi )
}
val bitmap = Bitmap . createBitmap ( processedRoi . cols ( ) , processedRoi . rows ( ) , Bitmap . Config . ARGB_8888 )
try {
Utils . matToBitmap ( processedRoi , bitmap )
// Debug: Log bitmap dimensions to check for mirroring
PGHLog . d ( TAG , " 🔍 OCR bitmap for $key : ${bitmap.width} x ${bitmap.height} " )
val rawResult = performOCRWithTimeout ( bitmap , key )
// Debug: Log raw OCR result before post-processing
PGHLog . d ( TAG , " 🔍 Raw OCR result for $key : ' $rawResult ' " )
// Post-process result for better accuracy
val cleanedResult = rawResult ?. let { postProcessOCRResult ( it , key ) }
if ( cleanedResult != null && cleanedResult . isNotBlank ( ) ) {
PGHLog . i ( TAG , " ✅ OCR SUCCESS: $key = ' $cleanedResult ' " )
cleanedResult
} else {
PGHLog . w ( TAG , " ❌ OCR FAILED: $key - no text found " )
null
}
} finally {
bitmap . recycle ( )
processedRoi . release ( )
}
} finally {
// Cleanup resources
bitmap . recycle ( )
processedRoi . release ( )
roi . release ( )
} . also { extractedText ->
if ( extractedText != null ) {
PGHLog . i ( TAG , " ✅ OCR SUCCESS: $key = ' $extractedText ' " )
} else {
PGHLog . w ( TAG , " ❌ OCR FAILED: $key - no text found " )
}
}
} catch ( e : Exception ) {
@ -474,18 +536,21 @@ class PokemonDataExtractorImpl(
}
/ * *
* Preprocess image region for optimal OCR accuracy
* Enhanced preprocessing with optimal scaling and noise reduction
* /
private fun preprocessImageForOCR ( roi : Mat ) : Mat {
private fun preprocessImageForOCREnhanced ( roi : Mat ) : Mat {
return try {
// Scale up small regions
val scaledRoi = if ( roi . width ( ) < MIN_OCR_WIDTH || roi . height ( ) < MIN_OCR_HEIGHT ) {
val scaleX = maxOf ( 1.0 , MIN_OCR_WIDTH . toDouble ( ) / roi . width ( ) )
val scaleY = maxOf ( 1.0 , MIN_OCR_HEIGHT . toDouble ( ) / roi . height ( ) )
val scale = maxOf ( scaleX , scaleY )
// Optimal scaling for ML Kit (target height 64px)
val targetHeight = TARGET_OCR_HEIGHT
val scale = if ( roi . height ( ) < targetHeight ) {
targetHeight . toDouble ( ) / roi . height ( )
} else {
1.0
}
val scaledRoi = if ( scale > 1.0 ) {
val resized = Mat ( )
Imgproc . resize ( roi , resized , Size ( roi . width ( ) * scale , roi . height ( ) * scale ) )
Imgproc . resize ( roi , resized , Size ( roi . width ( ) * scale , roi . height ( ) * scale ) , 0.0 , 0.0 , Imgproc . INTER_CUBIC )
resized
} else {
val copy = Mat ( )
@ -495,66 +560,227 @@ class PokemonDataExtractorImpl(
// Convert to grayscale
val gray = Mat ( )
if ( scaledRoi . channels ( ) == 3 ) {
if ( scaledRoi . channels ( ) > 1 ) {
Imgproc . cvtColor ( scaledRoi , gray , Imgproc . COLOR_BGR2GRAY )
} else if ( scaledRoi . channels ( ) == 4 ) {
Imgproc . cvtColor ( scaledRoi , gray , Imgproc . COLOR_BGRA2GRAY )
} else {
scaledRoi . copyTo ( gray )
}
// Apply CLAHE for contrast enhancement
// Bilateral filter for noise reduction while preserving edges
val filtered = Mat ( )
Imgproc . bilateralFilter ( gray , filtered , BILATERAL_FILTER_D , BILATERAL_SIGMA_COLOR , BILATERAL_SIGMA_SPACE )
// CLAHE for adaptive contrast enhancement
val enhanced = Mat ( )
val clahe = Imgproc . createCLAHE ( CLAHE_CLIP_LIMIT , Size ( CLAHE_TILE_SIZE , CLAHE_TILE_SIZE ) )
clahe . apply ( gray , enhanced )
clahe . apply ( filtered , enhanced )
// Apply slight gaussian blur to reduce noise
val denoised = Mat ( )
Imgproc . GaussianBlur ( enhanced , denoised , Size ( GAUSSIAN_BLUR_KERNEL_SIZE , GAUSSIAN_BLUR_KERNEL_SIZE ) , GAUSSIAN_BLUR_SIGMA )
// Morphological operations to clean up text
val morphed = Mat ( )
val kernel = Imgproc . getStructuringElement ( Imgproc . MORPH_RECT , Size ( MORPHOLOGY_KERNEL_SIZE . toDouble ( ) , MORPHOLOGY_KERNEL_SIZE . toDouble ( ) ) )
Imgproc . morphologyEx ( enhanced , morphed , Imgproc . MORPH_CLOSE , kernel )
// Cleanup intermediate results
// Cleanup
scaledRoi . release ( )
gray . release ( )
gray . release ( )
filtered . release ( )
enhanced . release ( )
kernel . release ( )
denois ed
morph ed
} catch ( e : Exception ) {
PGHLog . e ( TAG , " Error preprocessing image for OCR " , e )
// Return copy of original if preprocessing fails
PGHLog . e ( TAG , " Error in enhanced OCR preprocessing " , e )
val result = Mat ( )
roi . copyTo ( result )
result
}
}
/ * *
* Perform OCR with timeout using ML Kit
* Perform OCR with timeout using multiple ML Kit recognizers with fallback logic
* /
private suspend fun performOCRWithTimeout ( bitmap : Bitmap , purpose : String ) : String ? {
return withTimeoutOrNull ( INDIVIDUAL_OCR_TIMEOUT_SECONDS * 1000 ) {
suspendCoroutine < String ? > { continuation ->
try {
val image = InputImage . fromBitmap ( bitmap , 0 )
val recognizer = TextRecognition . getClient ( TextRecognizerOptions . DEFAULT_OPTIONS )
recognizer . process ( image )
. addOnSuccessListener { visionText ->
val result = visionText . text . trim ( )
continuation . resume ( if ( result . isBlank ( ) ) null else result )
}
. addOnFailureListener { e ->
PGHLog . e ( TAG , " OCR failed for $purpose : ${e.message} " )
continuation . resume ( null )
}
} catch ( e : Exception ) {
PGHLog . e ( TAG , " Error setting up OCR for $purpose " , e )
continuation . resume ( null )
val image = InputImage . fromBitmap ( bitmap , 0 )
// For nickname fields, try all script recognizers to handle international names
if ( purpose . contains ( " nickname " , ignoreCase = true ) || purpose . contains ( " species " , ignoreCase = true ) ) {
val results = tryMultipleRecognizers ( image , purpose )
// Return the longest non-empty result (usually most accurate)
results . filter { it . isNotBlank ( ) } . maxByOrNull { it . length }
} else {
// For other fields (stats, types), use Latin recognizer primarily
tryLatinRecognizer ( image , purpose )
}
}
}
/ * *
* Try multiple script recognizers and return all results for comparison
* /
private suspend fun tryMultipleRecognizers ( image : InputImage , purpose : String ) : List < String > = coroutineScope {
val results = mutableListOf < String > ( )
// Try all recognizers in parallel
val jobs = listOf (
async { tryRecognizer ( japaneseRecognizer , image , " Japanese " , purpose ) } ,
async { tryRecognizer ( chineseRecognizer , image , " Chinese " , purpose ) } ,
async { tryRecognizer ( koreanRecognizer , image , " Korean " , purpose ) } ,
async { tryRecognizer ( latinRecognizer , image , " Latin " , purpose ) }
)
// Collect all results
jobs . forEach { job ->
try {
val result = job . await ( )
if ( ! result . isNullOrBlank ( ) ) {
results . add ( result )
}
} catch ( e : Exception ) {
PGHLog . w ( TAG , " Script recognizer failed for $purpose : ${e.message} " )
}
}
PGHLog . d ( TAG , " 🌍 Multi-script OCR for $purpose found ${results.size} results: $results " )
results
}
/ * *
* Try Latin recognizer specifically
* /
private suspend fun tryLatinRecognizer ( image : InputImage , purpose : String ) : String ? {
return tryRecognizer ( latinRecognizer , image , " Latin " , purpose )
}
/ * *
* Generic method to try a specific recognizer
* /
private suspend fun tryRecognizer ( recognizer : Any , image : InputImage , scriptName : String , purpose : String ) : String ? {
return suspendCoroutine { continuation ->
try {
val recognizerClient = recognizer as com . google . mlkit . vision . text . TextRecognizer
recognizerClient . process ( image )
. addOnSuccessListener { visionText ->
val result = visionText . text . trim ( )
PGHLog . d ( TAG , " 📱 $scriptName OCR for $purpose : ' $result ' " )
continuation . resume ( if ( result . isBlank ( ) ) null else result )
}
. addOnFailureListener { e ->
PGHLog . w ( TAG , " $scriptName OCR failed for $purpose : ${e.message} " )
continuation . resume ( null )
}
} catch ( e : Exception ) {
PGHLog . e ( TAG , " Error setting up $scriptName OCR for $purpose " , e )
continuation . resume ( null )
}
}
}
/ * *
* Post - process OCR results with field - specific cleaning
* /
private fun postProcessOCRResult ( text : String , purpose : String ) : String {
var cleaned = text . trim ( )
// First, handle line break issues that can fragment words
cleaned = cleaned . replace ( " \n " , " " ) . replace ( " \r " , " " ) // Remove all line breaks
cleaned = cleaned . replace ( " \\ s+ " . toRegex ( ) , " " ) // Normalize whitespace
when {
purpose . contains ( " type " ) -> {
// Common OCR fixes for Pokemon types
cleaned = cleaned . replace ( " Electrlc " , " Electric " , true )
cleaned = cleaned . replace ( " Electnc " , " Electric " , true )
cleaned = cleaned . replace ( " Electr1c " , " Electric " , true )
cleaned = cleaned . replace ( " E1ectric " , " Electric " , true )
cleaned = cleaned . replace ( " 0rass " , " Grass " , true )
cleaned = cleaned . replace ( " 6rass " , " Grass " , true )
cleaned = cleaned . replace ( " Flylng " , " Flying " , true )
cleaned = cleaned . replace ( " F1ying " , " Flying " , true )
cleaned = cleaned . replace ( " Flre " , " Fire " , true )
cleaned = cleaned . replace ( " F1re " , " Fire " , true )
}
purpose . contains ( " nickname " ) || purpose . contains ( " species " ) -> {
// Common name fixes
cleaned = cleaned . replace ( " Quitky " , " Quirky " , true )
cleaned = cleaned . replace ( " - " , " " , true ) // Remove leading dashes
cleaned = cleaned . replace ( " _ " , " " , true ) // Remove underscores
cleaned = cleaned . replace ( " 1 " , " I " , true ) // Common 1/I confusion
cleaned = cleaned . replace ( " 0 " , " O " , true ) // Common 0/O confusion
}
purpose . contains ( " ability " ) || purpose . contains ( " nature " ) -> {
// Ability/nature specific fixes
cleaned = cleaned . replace ( " 1 " , " I " , true )
cleaned = cleaned . replace ( " 0 " , " O " , true )
}
purpose . contains ( " hp " ) || purpose . contains ( " attack " ) || purpose . contains ( " defense " ) ||
purpose . contains ( " spAttack " ) || purpose . contains ( " spDefense " ) || purpose . contains ( " speed " ) ||
purpose . contains ( " level " ) || purpose . contains ( " national_dex " ) -> {
// Numeric field fixes - common digit OCR errors
cleaned = cleaned . replace ( " L " , " 1 " , true ) // L often misread as 1
cleaned = cleaned . replace ( " l " , " 1 " , true ) // lowercase l often misread as 1
cleaned = cleaned . replace ( " O " , " 0 " , true ) // O often misread as 0
cleaned = cleaned . replace ( " S " , " 5 " , true ) // S can be misread as 5
cleaned = cleaned . replace ( " s " , " 5 " , true ) // lowercase s can be misread as 5
cleaned = cleaned . replace ( " G " , " 6 " , true ) // G can be misread as 6
cleaned = cleaned . replace ( " B " , " 8 " , true ) // B can be misread as 8
// Remove any non-numeric characters except for spaces (which we'll handle)
cleaned = cleaned . replace ( " [^0-9 \\ s] " . toRegex ( ) , " " )
cleaned = cleaned . replace ( " \\ s+ " . toRegex ( ) , " " ) // Remove spaces from numbers
}
}
return cleaned . trim ( )
}
/ * *
* Calculate confidence score for OCR result
* /
private fun calculateOCRConfidence ( text : String , purpose : String ) : Double {
if ( text . isBlank ( ) ) return 0.0
var confidence = 0.5 // Base confidence
// Length bonus (reasonable text length)
when {
text . length >= 3 && text . length <= 15 -> confidence += 0.2
text . length >= 2 -> confidence += 0.1
}
// Character composition bonus
val alphaRatio = text . count { it . isLetter ( ) } . toDouble ( ) / text . length
confidence += alphaRatio * 0.2
// Field-specific bonuses
when {
purpose . contains ( " type " ) && isValidPokemonType ( text ) -> confidence += 0.3
purpose . contains ( " level " ) && text . all { it . isDigit ( ) } -> confidence += 0.3
purpose . contains ( " nickname " ) && text . length >= 3 -> confidence += 0.1
}
// Penalty for common OCR artifacts
if ( text . contains ( " _ " ) || text . contains ( " | " ) || text . contains ( " - " ) && ! purpose . contains ( " national_dex " ) ) {
confidence -= 0.2
}
return confidence . coerceIn ( 0.0 , 1.0 )
}
/ * *
* Check if text is a valid Pokemon type
* /
private fun isValidPokemonType ( text : String ) : Boolean {
val validTypes = setOf (
" Normal " , " Fire " , " Water " , " Electric " , " Grass " , " Ice " , " Fighting " , " Poison " ,
" Ground " , " Flying " , " Psychic " , " Bug " , " Rock " , " Ghost " , " Dragon " , " Dark " ,
" Steel " , " Fairy "
)
return validTypes . any { it . equals ( text , ignoreCase = true ) }
}
/ * *
* Build final PokemonInfo from extracted data
* /