From 7594fb155200e1702876b96f9a33e53a59cc7a25 Mon Sep 17 00:00:00 2001
From: Quildra <Quildra@gmail.com>
Date: Sun, 3 Aug 2025 06:56:30 +0100
Subject: [PATCH] feat: add Japanese/Chinese/Korean OCR support for
 international Pokemon names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Added ML Kit script-specific recognizers for Japanese, Chinese, Korean
- Implemented multi-script OCR with intelligent fallback logic
- Uses parallel recognition for nickname/species fields only
- Maintains performance by using Latin recognizer for stats/types
- Successfully detects Japanese Pokemon names like "二ン フィイア"
- Removed Tesseract dependencies and cleaned up unused code

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 app/build.gradle                              |   6 +
 .../data/PokemonDataExtractorImpl.kt          | 348 +++++++++++++++---
 2 files changed, 293 insertions(+), 61 deletions(-)

diff --git a/app/build.gradle b/app/build.gradle
index 1a215ed..6196324 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -69,6 +69,12 @@ dependencies {
     // ML Kit for text recognition (OCR)
     implementation 'com.google.mlkit:text-recognition:16.0.0'
     
+    // ML Kit script-specific recognizers for international Pokemon names
+    implementation 'com.google.android.gms:play-services-mlkit-text-recognition-japanese:16.0.1'
+    implementation 'com.google.android.gms:play-services-mlkit-text-recognition-chinese:16.0.1'
+    implementation 'com.google.android.gms:play-services-mlkit-text-recognition-korean:16.0.1'
+    
+    
     // TensorFlow Lite
     implementation 'org.tensorflow:tensorflow-lite:2.13.0'
     implementation 'org.tensorflow:tensorflow-lite-support:0.4.4'
diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/data/PokemonDataExtractorImpl.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/data/PokemonDataExtractorImpl.kt
index 111c512..cbba8d9 100644
--- a/app/src/main/java/com/quillstudios/pokegoalshelper/data/PokemonDataExtractorImpl.kt
+++ b/app/src/main/java/com/quillstudios/pokegoalshelper/data/PokemonDataExtractorImpl.kt
@@ -7,6 +7,9 @@ import android.util.Size
 import com.google.mlkit.vision.common.InputImage
 import com.google.mlkit.vision.text.TextRecognition
 import com.google.mlkit.vision.text.latin.TextRecognizerOptions
+import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions
+import com.google.mlkit.vision.text.japanese.JapaneseTextRecognizerOptions
+import com.google.mlkit.vision.text.korean.KoreanTextRecognizerOptions
 import com.quillstudios.pokegoalshelper.ml.Detection
 import com.quillstudios.pokegoalshelper.PokemonInfo
 import com.quillstudios.pokegoalshelper.PokemonStats
@@ -34,26 +37,45 @@ class PokemonDataExtractorImpl(
         // OCR Configuration
         private const val DEFAULT_TIMEOUT_SECONDS = 10L
         private const val INDIVIDUAL_OCR_TIMEOUT_SECONDS = 5L
-        private const val BBOX_EXPANSION_FACTOR = 0.05f // 5% expansion for better OCR
+        private const val BBOX_EXPANSION_FACTOR = 0.1f // Increased to 10% for better text capture
         
         // OCR Image Processing Constants
-        private const val MIN_OCR_WIDTH = 50
-        private const val MIN_OCR_HEIGHT = 50
-        private const val GAUSSIAN_BLUR_KERNEL_SIZE = 3.0
-        private const val GAUSSIAN_BLUR_SIGMA = 0.5
-        private const val CLAHE_CLIP_LIMIT = 1.5
-        private const val CLAHE_TILE_SIZE = 8.0
+        private const val MIN_OCR_WIDTH = 64
+        private const val MIN_OCR_HEIGHT = 32
+        private const val TARGET_OCR_HEIGHT = 64 // Optimal height for ML Kit
+        
+        // Multi-scale processing
+        private const val SCALE_FACTOR_1 = 2.0 // 2x upscale
+        private const val SCALE_FACTOR_2 = 3.0 // 3x upscale for small text
+        
+        // Contrast enhancement
+        private const val CLAHE_CLIP_LIMIT = 2.0 // Increased for better contrast
+        private const val CLAHE_TILE_SIZE = 4.0 // Smaller tiles for finer control
+        
+        // Noise reduction
+        private const val MORPHOLOGY_KERNEL_SIZE = 2
+        private const val BILATERAL_FILTER_D = 5
+        private const val BILATERAL_SIGMA_COLOR = 50.0
+        private const val BILATERAL_SIGMA_SPACE = 50.0
     }
     
     private var screenSize: Size? = null
     private var ocrTimeout: Long = DEFAULT_TIMEOUT_SECONDS
     
+    // OCR Engines - Multiple script recognizers for international Pokemon names
+    private val latinRecognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)
+    private val japaneseRecognizer = TextRecognition.getClient(JapaneseTextRecognizerOptions.Builder().build())
+    private val chineseRecognizer = TextRecognition.getClient(ChineseTextRecognizerOptions.Builder().build())
+    private val koreanRecognizer = TextRecognition.getClient(KoreanTextRecognizerOptions.Builder().build())
+    
     // Dedicated dispatcher for OCR operations
     private val ocrDispatcher = Executors.newFixedThreadPool(4).asCoroutineDispatcher()
     
     override suspend fun extractPokemonInfo(detections: List<Detection>, screenMat: Mat): PokemonInfo? {
         return withContext(Dispatchers.IO) {
             try {
+                PGHLog.i(TAG, "🔧 Using ML Kit OCR engine")
+                
                 PGHLog.i(TAG, "🎯 Extracting Pokemon info from ${detections.size} detections")
                 
                 // Group detections by type for easy lookup
@@ -95,6 +117,12 @@ class PokemonDataExtractorImpl(
     }
     
     override fun cleanup() {
+        // Close all recognizers
+        latinRecognizer.close()
+        japaneseRecognizer.close()
+        chineseRecognizer.close()
+        koreanRecognizer.close()
+        
         ocrDispatcher.close()
         PGHLog.d(TAG, "🧹 PokemonDataExtractor cleanup completed")
     }
@@ -188,26 +216,60 @@ class PokemonDataExtractorImpl(
             
             // Extract region of interest
             val roi = Mat(screenMat, expandedBbox)
-            val processedRoi = preprocessImageForOCR(roi)
-            val bitmap = Bitmap.createBitmap(processedRoi.cols(), processedRoi.rows(), Bitmap.Config.ARGB_8888)
             
             try {
-                // Convert to bitmap for ML Kit
-                Utils.matToBitmap(processedRoi, bitmap)
+                // Test both raw and processed ROI for numeric fields
+                val useRawForNumbers = key.contains("hp") || key.contains("attack") || key.contains("defense") || 
+                                     key.contains("spAttack") || key.contains("spDefense") || key.contains("speed") ||
+                                     key.contains("level") || key.contains("national_dex")
                 
-                // Perform OCR
-                performOCRWithTimeout(bitmap, key)
+                val processedRoi = if (useRawForNumbers) {
+                    // For numbers, try minimal processing - just ensure it's grayscale
+                    val gray = Mat()
+                    if (roi.channels() > 1) {
+                        Imgproc.cvtColor(roi, gray, Imgproc.COLOR_BGR2GRAY)
+                    } else {
+                        roi.copyTo(gray)
+                    }
+                    gray
+                } else {
+                    // For text, use specialized preprocessing based on field type
+                    // For now, use enhanced preprocessing for all text
+                    // TODO: Implement specialized preprocessing for types
+                    preprocessImageForOCREnhanced(roi)
+                }
+                
+                val bitmap = Bitmap.createBitmap(processedRoi.cols(), processedRoi.rows(), Bitmap.Config.ARGB_8888)
+                
+                try {
+                    Utils.matToBitmap(processedRoi, bitmap)
+                    
+                    // Debug: Log bitmap dimensions to check for mirroring
+                    PGHLog.d(TAG, "🔍 OCR bitmap for $key: ${bitmap.width}x${bitmap.height}")
+                    
+                    
+                    val rawResult = performOCRWithTimeout(bitmap, key)
+                    
+                    // Debug: Log raw OCR result before post-processing
+                    PGHLog.d(TAG, "🔍 Raw OCR result for $key: '$rawResult'")
+                    
+                    // Post-process result for better accuracy
+                    val cleanedResult = rawResult?.let { postProcessOCRResult(it, key) }
+                    
+                    if (cleanedResult != null && cleanedResult.isNotBlank()) {
+                        PGHLog.i(TAG, "✅ OCR SUCCESS: $key = '$cleanedResult'")
+                        cleanedResult
+                    } else {
+                        PGHLog.w(TAG, "❌ OCR FAILED: $key - no text found")
+                        null
+                    }
+                } finally {
+                    bitmap.recycle()
+                    processedRoi.release()
+                }
             } finally {
                 // Cleanup resources
-                bitmap.recycle()
-                processedRoi.release()
                 roi.release()
-            }.also { extractedText ->
-                if (extractedText != null) {
-                    PGHLog.i(TAG, "✅ OCR SUCCESS: $key = '$extractedText'")
-                } else {
-                    PGHLog.w(TAG, "❌ OCR FAILED: $key - no text found")
-                }
             }
             
         } catch (e: Exception) {
@@ -474,18 +536,21 @@ class PokemonDataExtractorImpl(
     }
     
     /**
-     * Preprocess image region for optimal OCR accuracy
+     * Enhanced preprocessing with optimal scaling and noise reduction
      */
-    private fun preprocessImageForOCR(roi: Mat): Mat {
+    private fun preprocessImageForOCREnhanced(roi: Mat): Mat {
         return try {
-            // Scale up small regions
-            val scaledRoi = if (roi.width() < MIN_OCR_WIDTH || roi.height() < MIN_OCR_HEIGHT) {
-                val scaleX = maxOf(1.0, MIN_OCR_WIDTH.toDouble() / roi.width())
-                val scaleY = maxOf(1.0, MIN_OCR_HEIGHT.toDouble() / roi.height())
-                val scale = maxOf(scaleX, scaleY)
-                
+            // Optimal scaling for ML Kit (target height 64px)
+            val targetHeight = TARGET_OCR_HEIGHT
+            val scale = if (roi.height() < targetHeight) {
+                targetHeight.toDouble() / roi.height()
+            } else {
+                1.0
+            }
+            
+            val scaledRoi = if (scale > 1.0) {
                 val resized = Mat()
-                Imgproc.resize(roi, resized, Size(roi.width() * scale, roi.height() * scale))
+                Imgproc.resize(roi, resized, Size(roi.width() * scale, roi.height() * scale), 0.0, 0.0, Imgproc.INTER_CUBIC)
                 resized
             } else {
                 val copy = Mat()
@@ -495,66 +560,227 @@ class PokemonDataExtractorImpl(
             
             // Convert to grayscale
             val gray = Mat()
-            if (scaledRoi.channels() == 3) {
+            if (scaledRoi.channels() > 1) {
                 Imgproc.cvtColor(scaledRoi, gray, Imgproc.COLOR_BGR2GRAY)
-            } else if (scaledRoi.channels() == 4) {
-                Imgproc.cvtColor(scaledRoi, gray, Imgproc.COLOR_BGRA2GRAY)
             } else {
                 scaledRoi.copyTo(gray)
             }
             
-            // Apply CLAHE for contrast enhancement
+            // Bilateral filter for noise reduction while preserving edges
+            val filtered = Mat()
+            Imgproc.bilateralFilter(gray, filtered, BILATERAL_FILTER_D, BILATERAL_SIGMA_COLOR, BILATERAL_SIGMA_SPACE)
+            
+            // CLAHE for adaptive contrast enhancement
             val enhanced = Mat()
             val clahe = Imgproc.createCLAHE(CLAHE_CLIP_LIMIT, Size(CLAHE_TILE_SIZE, CLAHE_TILE_SIZE))
-            clahe.apply(gray, enhanced)
+            clahe.apply(filtered, enhanced)
             
-            // Apply slight gaussian blur to reduce noise
-            val denoised = Mat()
-            Imgproc.GaussianBlur(enhanced, denoised, Size(GAUSSIAN_BLUR_KERNEL_SIZE, GAUSSIAN_BLUR_KERNEL_SIZE), GAUSSIAN_BLUR_SIGMA)
+            // Morphological operations to clean up text
+            val morphed = Mat()
+            val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, Size(MORPHOLOGY_KERNEL_SIZE.toDouble(), MORPHOLOGY_KERNEL_SIZE.toDouble()))
+            Imgproc.morphologyEx(enhanced, morphed, Imgproc.MORPH_CLOSE, kernel)
             
-            // Cleanup intermediate results
+            // Cleanup
             scaledRoi.release()
-            gray.release()  
+            gray.release()
+            filtered.release()
             enhanced.release()
+            kernel.release()
             
-            denoised
+            morphed
             
         } catch (e: Exception) {
-            PGHLog.e(TAG, "Error preprocessing image for OCR", e)
-            // Return copy of original if preprocessing fails
+            PGHLog.e(TAG, "Error in enhanced OCR preprocessing", e)
             val result = Mat()
             roi.copyTo(result)
             result
         }
     }
     
+    
     /**
-     * Perform OCR with timeout using ML Kit
+     * Perform OCR with timeout using multiple ML Kit recognizers with fallback logic
      */
     private suspend fun performOCRWithTimeout(bitmap: Bitmap, purpose: String): String? {
         return withTimeoutOrNull(INDIVIDUAL_OCR_TIMEOUT_SECONDS * 1000) {
-            suspendCoroutine<String?> { continuation ->
-                try {
-                    val image = InputImage.fromBitmap(bitmap, 0)
-                    val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)
-                    
-                    recognizer.process(image)
-                        .addOnSuccessListener { visionText ->
-                            val result = visionText.text.trim()
-                            continuation.resume(if (result.isBlank()) null else result)
-                        }
-                        .addOnFailureListener { e ->
-                            PGHLog.e(TAG, "OCR failed for $purpose: ${e.message}")
-                            continuation.resume(null)
-                        }
-                } catch (e: Exception) {
-                    PGHLog.e(TAG, "Error setting up OCR for $purpose", e)
-                    continuation.resume(null)
+            val image = InputImage.fromBitmap(bitmap, 0)
+            
+            // For nickname fields, try all script recognizers to handle international names
+            if (purpose.contains("nickname", ignoreCase = true) || purpose.contains("species", ignoreCase = true)) {
+                val results = tryMultipleRecognizers(image, purpose)
+                // Return the longest non-empty result (usually most accurate)
+                results.filter { it.isNotBlank() }.maxByOrNull { it.length }
+            } else {
+                // For other fields (stats, types), use Latin recognizer primarily
+                tryLatinRecognizer(image, purpose)
+            }
+        }
+    }
+    
+    /**
+     * Try multiple script recognizers and return all results for comparison
+     */
+    private suspend fun tryMultipleRecognizers(image: InputImage, purpose: String): List<String> = coroutineScope {
+        val results = mutableListOf<String>()
+        
+        // Try all recognizers in parallel
+        val jobs = listOf(
+            async { tryRecognizer(japaneseRecognizer, image, "Japanese", purpose) },
+            async { tryRecognizer(chineseRecognizer, image, "Chinese", purpose) },
+            async { tryRecognizer(koreanRecognizer, image, "Korean", purpose) },
+            async { tryRecognizer(latinRecognizer, image, "Latin", purpose) }
+        )
+        
+        // Collect all results
+        jobs.forEach { job ->
+            try {
+                val result = job.await()
+                if (!result.isNullOrBlank()) {
+                    results.add(result)
                 }
+            } catch (e: Exception) {
+                PGHLog.w(TAG, "Script recognizer failed for $purpose: ${e.message}")
+            }
+        }
+        
+        PGHLog.d(TAG, "🌍 Multi-script OCR for $purpose found ${results.size} results: $results")
+        results
+    }
+    
+    /**
+     * Try Latin recognizer specifically
+     */
+    private suspend fun tryLatinRecognizer(image: InputImage, purpose: String): String? {
+        return tryRecognizer(latinRecognizer, image, "Latin", purpose)
+    }
+    
+    /**
+     * Generic method to try a specific recognizer
+     */
+    private suspend fun tryRecognizer(recognizer: Any, image: InputImage, scriptName: String, purpose: String): String? {
+        return suspendCoroutine { continuation ->
+            try {
+                val recognizerClient = recognizer as com.google.mlkit.vision.text.TextRecognizer
+                
+                recognizerClient.process(image)
+                    .addOnSuccessListener { visionText ->
+                        val result = visionText.text.trim()
+                        PGHLog.d(TAG, "📱 $scriptName OCR for $purpose: '$result'")
+                        continuation.resume(if (result.isBlank()) null else result)
+                    }
+                    .addOnFailureListener { e ->
+                        PGHLog.w(TAG, "$scriptName OCR failed for $purpose: ${e.message}")
+                        continuation.resume(null)
+                    }
+            } catch (e: Exception) {
+                PGHLog.e(TAG, "Error setting up $scriptName OCR for $purpose", e)
+                continuation.resume(null)
             }
         }
     }
     
+    /**
+     * Post-process OCR results with field-specific cleaning
+     */
+    private fun postProcessOCRResult(text: String, purpose: String): String {
+        var cleaned = text.trim()
+        
+        // First, handle line break issues that can fragment words
+        cleaned = cleaned.replace("\n", "").replace("\r", "") // Remove all line breaks
+        cleaned = cleaned.replace("\\s+".toRegex(), " ") // Normalize whitespace
+        
+        when {
+            purpose.contains("type") -> {
+                // Common OCR fixes for Pokemon types
+                cleaned = cleaned.replace("Electrlc", "Electric", true)
+                cleaned = cleaned.replace("Electnc", "Electric", true)
+                cleaned = cleaned.replace("Electr1c", "Electric", true)
+                cleaned = cleaned.replace("E1ectric", "Electric", true)
+                cleaned = cleaned.replace("0rass", "Grass", true)
+                cleaned = cleaned.replace("6rass", "Grass", true)
+                cleaned = cleaned.replace("Flylng", "Flying", true)
+                cleaned = cleaned.replace("F1ying", "Flying", true)
+                cleaned = cleaned.replace("Flre", "Fire", true)
+                cleaned = cleaned.replace("F1re", "Fire", true)
+            }
+            purpose.contains("nickname") || purpose.contains("species") -> {
+                // Common name fixes
+                cleaned = cleaned.replace("Quitky", "Quirky", true)
+                cleaned = cleaned.replace("- ", "", true) // Remove leading dashes
+                cleaned = cleaned.replace("_", "", true) // Remove underscores
+                cleaned = cleaned.replace("1", "I", true) // Common 1/I confusion
+                cleaned = cleaned.replace("0", "O", true) // Common 0/O confusion
+            }
+            purpose.contains("ability") || purpose.contains("nature") -> {
+                // Ability/nature specific fixes
+                cleaned = cleaned.replace("1", "I", true)
+                cleaned = cleaned.replace("0", "O", true)
+            }
+            purpose.contains("hp") || purpose.contains("attack") || purpose.contains("defense") || 
+            purpose.contains("spAttack") || purpose.contains("spDefense") || purpose.contains("speed") ||
+            purpose.contains("level") || purpose.contains("national_dex") -> {
+                // Numeric field fixes - common digit OCR errors
+                cleaned = cleaned.replace("L", "1", true) // L often misread as 1
+                cleaned = cleaned.replace("l", "1", true) // lowercase l often misread as 1
+                cleaned = cleaned.replace("O", "0", true) // O often misread as 0
+                cleaned = cleaned.replace("S", "5", true) // S can be misread as 5
+                cleaned = cleaned.replace("s", "5", true) // lowercase s can be misread as 5
+                cleaned = cleaned.replace("G", "6", true) // G can be misread as 6
+                cleaned = cleaned.replace("B", "8", true) // B can be misread as 8
+                // Remove any non-numeric characters except for spaces (which we'll handle)
+                cleaned = cleaned.replace("[^0-9\\s]".toRegex(), "")
+                cleaned = cleaned.replace("\\s+".toRegex(), "") // Remove spaces from numbers
+            }
+        }
+        
+        return cleaned.trim()
+    }
+    
+    /**
+     * Calculate confidence score for OCR result
+     */
+    private fun calculateOCRConfidence(text: String, purpose: String): Double {
+        if (text.isBlank()) return 0.0
+        
+        var confidence = 0.5 // Base confidence
+        
+        // Length bonus (reasonable text length)
+        when {
+            text.length >= 3 && text.length <= 15 -> confidence += 0.2
+            text.length >= 2 -> confidence += 0.1
+        }
+        
+        // Character composition bonus
+        val alphaRatio = text.count { it.isLetter() }.toDouble() / text.length
+        confidence += alphaRatio * 0.2
+        
+        // Field-specific bonuses
+        when {
+            purpose.contains("type") && isValidPokemonType(text) -> confidence += 0.3
+            purpose.contains("level") && text.all { it.isDigit() } -> confidence += 0.3
+            purpose.contains("nickname") && text.length >= 3 -> confidence += 0.1
+        }
+        
+        // Penalty for common OCR artifacts
+        if (text.contains("_") || text.contains("|") || text.contains("-") && !purpose.contains("national_dex")) {
+            confidence -= 0.2
+        }
+        
+        return confidence.coerceIn(0.0, 1.0)
+    }
+    
+    /**
+     * Check if text is a valid Pokemon type
+     */
+    private fun isValidPokemonType(text: String): Boolean {
+        val validTypes = setOf(
+            "Normal", "Fire", "Water", "Electric", "Grass", "Ice", "Fighting", "Poison",
+            "Ground", "Flying", "Psychic", "Bug", "Rock", "Ghost", "Dragon", "Dark",
+            "Steel", "Fairy"
+        )
+        return validTypes.any { it.equals(text, ignoreCase = true) }
+    }
+    
     /**
      * Build final PokemonInfo from extracted data
      */