diff --git a/.gitignore b/.gitignore index e434733..e10660a 100644 --- a/.gitignore +++ b/.gitignore @@ -113,4 +113,10 @@ fastlane/readme.md # Android Profiling *.hprof +model_export_env/ + +# Debug tools and temporary files +tools/debug_scripts/debug_env/ +raw_models/exports/ +package-lock.json venv*/ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..146effe --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,200 @@ +# Claude Development Guidelines + +This document establishes development patterns and workflows for maintaining clean, trackable progress on the PokeGoalsHelper2 project. + +## Git Workflow + +### Branch Naming Convention +Use descriptive branch names following this pattern: +- `feature/description-of-feature` +- `bugfix/description-of-bug` +- `refactor/description-of-refactor` +- `docs/description-of-documentation` + +Examples: +- `feature/floating-orb-ui` +- `bugfix/image-buffer-leak` +- `refactor/mvc-architecture` + +### Commit Guidelines + +#### Commit Early and Often +- Commit after each logical change or completion of a small task +- Don't wait to complete entire features before committing +- Use the TodoWrite tool to track progress and commit when todos are completed + +#### Commit Message Format +``` +: + + + +Related todos: #, # +``` + +Types: +- `feat`: New feature +- `fix`: Bug fix +- `refactor`: Code refactoring +- `docs`: Documentation +- `test`: Testing +- `style`: Code formatting/style + +Examples: +``` +feat: add floating orb UI with expandable menu + +Implemented CalcIV-style floating orb that expands to show +detection and filter options. Replaces old button grid layout. + +Related todos: #22 +``` + +``` +fix: resolve ImageReader buffer leak causing logcat spam + +- Increased buffer count from 2 to 3 +- Added proper image cleanup in triggerManualDetection() +- Ensures images are closed after processing + +Related todos: #buffer-fix +``` + +### Branch Management + +1. **Create branch for each task/feature** + ```bash + git checkout -b feature/ + ``` + +2. **Commit frequently during development** + ```bash + git add . + git commit -m "feat: implement basic orb button structure" + ``` + +3. **Push branch and create PR when complete** + ```bash + git push origin feature/ + ``` + +4. **Merge to main and delete feature branch** + ```bash + git checkout main + git merge feature/ + git branch -d feature/ + ``` + +## Architecture Guidelines + +### Separation of Concerns + +#### UI Layer (Views) +- Handle only UI interactions and display +- No business logic or direct data manipulation +- Communicate via events/callbacks + +#### Business Logic Layer (Controllers/Services) +- Handle core application logic +- Process data and make decisions +- Independent of UI implementation + +#### Data Layer (Models) +- Manage data structures and persistence +- Handle API calls and data transformations +- Pure data operations + +### Event-Driven Communication + +Use callbacks and event buses to decouple UI from business logic: + +```kotlin +// UI publishes events +interface DetectionUIEvents { + fun onDetectionRequested() + fun onClassFilterChanged(className: String?) + fun onDebugModeToggled() +} + +// Business logic handles events +class DetectionController : DetectionUIEvents { + override fun onDetectionRequested() { + // Handle detection logic + // Notify UI via callbacks + } +} +``` + +### File Organization + +``` +app/src/main/java/com/quillstudios/pokegoalshelper/ +├── ui/ +│ ├── FloatingOrbUI.kt # UI components +│ ├── DetectionOverlay.kt # Visual overlays +│ └── interfaces/ +│ └── DetectionUIEvents.kt # UI event interfaces +├── controllers/ +│ ├── DetectionController.kt # Business logic +│ └── ScreenCaptureController.kt # Screen capture logic +├── models/ +│ ├── Detection.kt # Data models +│ ├── PokemonInfo.kt # Domain models +│ └── DetectionSettings.kt # Configuration +└── services/ + ├── YOLOOnnxDetector.kt # YOLO inference + └── ScreenCaptureService.kt # Android service +``` + +## Development Best Practices + +### Testing Approach +- Test business logic separately from UI +- Mock UI interactions for controller tests +- Use dependency injection for testability + +### Code Quality +- Single responsibility principle +- Minimize coupling between layers +- Use interfaces for dependency injection +- Keep functions small and focused + +### Documentation +- Update this file when patterns change +- Document complex business logic +- Use clear variable and function names +- Add TODO comments for future improvements + +## Current Architecture Status + +### Phase 1: Current State (Coupled) +- UI and business logic mixed in ScreenCaptureService +- Direct calls between UI and YOLO detector +- Monolithic service class + +### Phase 2: Target State (Decoupled) +- Separate UI components with event interfaces +- Controller layer handling business logic +- Clean dependency injection +- Testable, maintainable architecture + +## Commit Reminders + +Before each commit, ensure: +- [ ] Code follows separation of concerns +- [ ] No business logic in UI classes +- [ ] Interfaces used for layer communication +- [ ] Todo list updated with progress +- [ ] Commit message follows format guidelines +- [ ] Branch name is descriptive + +## Claude Instructions + +When working on this project: +1. Always create a new branch for each task +2. Commit frequently with descriptive messages +3. Use TodoWrite tool to track progress +4. Follow MVC/event-driven patterns +5. Separate UI logic from business logic +6. Test changes incrementally +7. Update documentation when architecture changes \ No newline at end of file diff --git a/SHINY_ICON_DEBUG_REPORT.md b/SHINY_ICON_DEBUG_REPORT.md new file mode 100644 index 0000000..dc97a1a --- /dev/null +++ b/SHINY_ICON_DEBUG_REPORT.md @@ -0,0 +1,107 @@ +# Shiny Icon Detection Issue - Debug Report + +## Problem Summary +The ONNX model with NMS=True is not detecting shiny icons, while the original .pt model detects them at 0.97 confidence. This investigation aimed to identify why the ONNX model fails to detect shiny icons (class 50). + +## Root Cause Identified ✅ +**The built-in NMS in the ONNX model is filtering out shiny icon detections because they don't make it into the top 300 highest-confidence detections.** + +## Investigation Process + +### 1. Initial Debugging Setup +- Added `DEBUG_SHINY_DETECTION = true` flag to enable detailed logging +- Lowered confidence threshold from 0.45f to 0.25f temporarily +- Added special debug logging for shiny_icon (class 50) candidates + +### 2. Raw Model Output Analysis +**Key Discovery**: The ONNX model output format is `1 x 300 x 6` instead of the expected `8400 x 99`: +- **Expected** (raw model): 8400 detections × (4 coords + 95 classes) = 831,600 values +- **Actual** (NMS model): 300 final detections × (4 coords + 1 confidence + 1 class_id) = 1,800 values + +This confirmed the model has built-in NMS that only returns the top 300 detections. + +### 3. Class Detection Analysis +**Test Results from logs**: +``` +🔬 [NMS CLASSES] Detected classes: [29, 32, 33, 47, 48, 62, 63, 64, 67, 68, 69, 70, 71, 72] +❌ [NO SHINY] Shiny icon (class 50) not found in NMS output +``` + +The model consistently detects other classes but **class 50 (shiny_icon) never appears** in the NMS output across all preprocessing methods (ultralytics, enhanced, sharpened, original). + +### 4. Model Performance Comparison +- **.pt model**: Detects shiny icon at **0.97 confidence** +- **ONNX model**: Shiny icon completely absent from top 300 NMS results +- **Other detection classes**: Working fine in ONNX (20-22 detections per method) + +## Technical Details + +### Debug Infrastructure Added +1. **Raw output inspection**: Logs tensor dimensions and output statistics +2. **Class detection tracking**: Shows all detected classes in NMS output +3. **Low-confidence checking**: Searches for any class 50 predictions regardless of confidence +4. **Multi-method analysis**: Tests across all preprocessing methods + +### Code Changes Made +- `YOLOOnnxDetector.kt`: Added comprehensive debugging in `detectWithPreprocessing()` +- Debug flags: `DEBUG_SHINY_DETECTION`, lowered `CONFIDENCE_THRESHOLD` +- Enhanced logging for NMS output format analysis + +## Why NMS=True Was Chosen +The NMS=True version provides "drastically better results" for general detection, so reverting to NMS=False isn't ideal. + +## Proposed Solutions + +### Option 1: Hybrid Model Approach (Recommended) +1. **Primary model**: Keep NMS=True for general detection performance +2. **Fallback model**: Add NMS=False model specifically for rare classes like shiny icons +3. **Detection strategy**: Run general detection first, then targeted detection for missing rare classes + +### Option 2: NMS Parameter Tuning +Re-export ONNX model with modified NMS parameters: +```python +model.export(format='onnx', nms=True, max_det=500, conf=0.1) # Increase max detections, lower confidence +``` + +### Option 3: Post-Processing Enhancement +- Export NMS=False model temporarily to verify shiny detection capability +- Implement custom NMS that preserves rare class detections +- Use class-aware confidence thresholds + +### Option 4: Model Re-training Consideration +If shiny icons are consistently low-confidence, consider: +- Augmenting training data with more shiny examples +- Adjusting class weights during training +- Using focal loss for rare classes + +## Next Steps (Prioritized) + +### Immediate (Next Session) +1. **Test NMS=False export** to confirm model can detect shiny icons in raw output +2. **Document baseline performance** comparison between NMS=True vs NMS=False +3. **Verify class mapping** is correct in ONNX conversion + +### Short Term +1. **Implement hybrid approach** with both models if NMS=False confirms shiny detection +2. **Optimize detection pipeline** to minimize performance impact +3. **Add class-specific confidence thresholds** + +### Long Term +1. **Model optimization**: Fine-tune NMS parameters during export +2. **Training improvements**: Address rare class detection in model training +3. **Performance monitoring**: Track detection rates for all rare classes + +## Files Modified +- `YOLOOnnxDetector.kt`: Added debugging infrastructure +- Branch: `feature/debug-shiny-pokeball-detection` +- Commits: Multiple debugging iterations with detailed logging + +## Test Environment +- Device: Android device with ONNX Runtime +- Test image: Contains shiny icon detectable at 0.97 confidence by .pt model +- ONNX model: `best.onnx` with NMS=True, 95 classes, 300 max detections + +## Conclusion +The investigation successfully identified that the ONNX model with built-in NMS is capable of detecting objects effectively, but the aggressive NMS filtering (top 300 only) is preventing shiny icon detections from appearing in the final output. The model architecture and class mapping appear correct, as other classes are detected properly. + +The solution requires either adjusting the NMS parameters during model export or implementing a hybrid detection approach to preserve rare class detections while maintaining the superior general performance of the NMS=True model. \ No newline at end of file diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt index efaade0..abb3f99 100644 --- a/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt @@ -21,13 +21,17 @@ import android.view.Gravity import android.widget.Button import android.widget.LinearLayout import androidx.core.app.NotificationCompat +import com.quillstudios.pokegoalshelper.controllers.DetectionController +import com.quillstudios.pokegoalshelper.ui.FloatingOrbUI import org.opencv.android.Utils import org.opencv.core.* import org.opencv.imgproc.Imgproc +import org.opencv.imgcodecs.Imgcodecs import com.google.mlkit.vision.common.InputImage import com.google.mlkit.vision.text.TextRecognition import com.google.mlkit.vision.text.latin.TextRecognizerOptions import java.util.concurrent.CountDownLatch +import java.io.File import java.util.concurrent.TimeUnit import java.util.concurrent.Executors import java.util.concurrent.ThreadPoolExecutor @@ -98,9 +102,9 @@ class ScreenCaptureService : Service() { private var screenDensity = 0 private var detectionOverlay: DetectionOverlay? = null - // Floating button overlay - private var overlayButton: View? = null - private var windowManager: WindowManager? = null + // MVC Components + private lateinit var detectionController: DetectionController + private var floatingOrbUI: FloatingOrbUI? = null private val handler = Handler(Looper.getMainLooper()) private var captureInterval = 2000L // Capture every 2 seconds @@ -148,6 +152,14 @@ class ScreenCaptureService : Service() { } else { Log.i(TAG, "✅ ONNX YOLO detector initialized for screen capture") } + + // Initialize MVC components + detectionController = DetectionController(yoloDetector!!) + floatingOrbUI = FloatingOrbUI(this, detectionController) + detectionController.setUICallbacks(floatingOrbUI!!) + detectionController.setDetectionRequestCallback { triggerManualDetection() } + + Log.d(TAG, "✅ MVC architecture initialized") } override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { @@ -268,9 +280,9 @@ class ScreenCaptureService : Service() { return } - Log.d(TAG, "Screen capture setup complete, creating manual trigger button") - // Create floating detection button instead of auto-capture - createFloatingButton() + Log.d(TAG, "Screen capture setup complete, showing floating orb UI") + // Show the floating orb UI + floatingOrbUI?.show() } catch (e: Exception) { Log.e(TAG, "Error starting screen capture", e) @@ -283,7 +295,7 @@ class ScreenCaptureService : Service() { handler.removeCallbacks(captureRunnable) hideDetectionOverlay() - removeFloatingButton() + floatingOrbUI?.hide() latestImage?.close() latestImage = null virtualDisplay?.release() @@ -334,6 +346,9 @@ class ScreenCaptureService : Service() { val rowStride = planes[0].rowStride val rowPadding = rowStride - pixelStride * screenWidth + Log.d(TAG, "🖼️ CAPTURE DEBUG: pixelStride=$pixelStride, rowStride=$rowStride, rowPadding=$rowPadding") + Log.d(TAG, "🖼️ CAPTURE DEBUG: screenSize=${screenWidth}x${screenHeight}, expected bitmap=${screenWidth + rowPadding / pixelStride}x${screenHeight}") + // Create bitmap from image val bitmap = Bitmap.createBitmap( screenWidth + rowPadding / pixelStride, @@ -342,16 +357,40 @@ class ScreenCaptureService : Service() { ) bitmap.copyPixelsFromBuffer(buffer) + Log.d(TAG, "🖼️ CAPTURE DEBUG: created bitmap=${bitmap.width}x${bitmap.height}") + // Convert to cropped bitmap if needed val croppedBitmap = if (rowPadding == 0) { + Log.d(TAG, "🖼️ CAPTURE DEBUG: No padding, using original bitmap") bitmap } else { + Log.d(TAG, "🖼️ CAPTURE DEBUG: Cropping bitmap from ${bitmap.width}x${bitmap.height} to ${screenWidth}x${screenHeight}") Bitmap.createBitmap(bitmap, 0, 0, screenWidth, screenHeight) } + Log.d(TAG, "🖼️ CAPTURE DEBUG: final bitmap=${croppedBitmap.width}x${croppedBitmap.height}") + // Convert to OpenCV Mat for analysis val mat = Mat() Utils.bitmapToMat(croppedBitmap, mat) + + // DEBUG: Check color conversion + Log.d(TAG, "🎨 COLOR DEBUG: Mat type=${mat.type()}, channels=${mat.channels()}") + Log.d(TAG, "🎨 COLOR DEBUG: OpenCV expects BGR, Android Bitmap is ARGB") + + // Sample a center pixel to check color values + if (mat.rows() > 0 && mat.cols() > 0) { + val centerY = mat.rows() / 2 + val centerX = mat.cols() / 2 + val pixel = mat.get(centerY, centerX) + if (pixel != null && pixel.size >= 3) { + val b = pixel[0].toInt() + val g = pixel[1].toInt() + val r = pixel[2].toInt() + Log.d(TAG, "🎨 COLOR DEBUG: Center pixel (${centerX},${centerY}) BGR=($b,$g,$r) -> RGB=(${r},${g},${b})") + Log.d(TAG, "🎨 COLOR DEBUG: Center pixel hex = #${String.format("%02x%02x%02x", r, g, b)}") + } + } // Run YOLO analysis analyzePokemonScreen(mat) @@ -626,18 +665,33 @@ class ScreenCaptureService : Service() { if (detection == null) return null try { - // Validate and clip bounding box to image boundaries + // Expand bounding box by 5% for all OCR classes to improve text extraction accuracy val bbox = detection.boundingBox - val clippedX = kotlin.math.max(0, kotlin.math.min(bbox.x, mat.cols() - 1)) - val clippedY = kotlin.math.max(0, kotlin.math.min(bbox.y, mat.rows() - 1)) - val clippedWidth = kotlin.math.max(1, kotlin.math.min(bbox.width, mat.cols() - clippedX)) - val clippedHeight = kotlin.math.max(1, kotlin.math.min(bbox.height, mat.rows() - clippedY)) + val expansionFactor = 0.05f // 5% expansion + val widthExpansion = (bbox.width * expansionFactor).toInt() + val heightExpansion = (bbox.height * expansionFactor).toInt() + + val expandedBbox = Rect( + bbox.x - widthExpansion, + bbox.y - heightExpansion, + bbox.width + (2 * widthExpansion), + bbox.height + (2 * heightExpansion) + ) + + // Validate and clip bounding box to image boundaries + val clippedX = kotlin.math.max(0, kotlin.math.min(expandedBbox.x, mat.cols() - 1)) + val clippedY = kotlin.math.max(0, kotlin.math.min(expandedBbox.y, mat.rows() - 1)) + val clippedWidth = kotlin.math.max(1, kotlin.math.min(expandedBbox.width, mat.cols() - clippedX)) + val clippedHeight = kotlin.math.max(1, kotlin.math.min(expandedBbox.height, mat.rows() - clippedY)) val safeBbox = Rect(clippedX, clippedY, clippedWidth, clippedHeight) - // Debug logging for problematic bounding boxes - if (safeBbox.x != bbox.x || safeBbox.y != bbox.y || safeBbox.width != bbox.width || safeBbox.height != bbox.height) { - Log.w(TAG, "⚠️ Clipped bbox for ${detection.className}: original=[${bbox.x},${bbox.y},${bbox.width},${bbox.height}] → safe=[${safeBbox.x},${safeBbox.y},${safeBbox.width},${safeBbox.height}] (image: ${mat.cols()}x${mat.rows()})") + // Debug logging for bounding box transformations + if (expandedBbox != bbox) { + Log.d(TAG, "📏 Expanded bbox for ${detection.className}: [${bbox.x},${bbox.y},${bbox.width},${bbox.height}] → [${expandedBbox.x},${expandedBbox.y},${expandedBbox.width},${expandedBbox.height}]") + } + if (safeBbox.x != expandedBbox.x || safeBbox.y != expandedBbox.y || safeBbox.width != expandedBbox.width || safeBbox.height != expandedBbox.height) { + Log.w(TAG, "⚠️ Clipped bbox for ${detection.className}: expanded=[${expandedBbox.x},${expandedBbox.y},${expandedBbox.width},${expandedBbox.height}] → safe=[${safeBbox.x},${safeBbox.y},${safeBbox.width},${safeBbox.height}] (image: ${mat.cols()}x${mat.rows()})") } // Extract region of interest using safe bounding box @@ -1052,203 +1106,93 @@ class ScreenCaptureService : Service() { Log.i(TAG, "====================================") } - private fun createFloatingButton() { - try { - if (overlayButton != null) return // Already created - - windowManager = getSystemService(Context.WINDOW_SERVICE) as WindowManager - - // Create a container for multiple buttons - val buttonContainer = LinearLayout(this).apply { - orientation = LinearLayout.VERTICAL - setBackgroundColor(0x80000000.toInt()) // Semi-transparent black background - setPadding(8, 8, 8, 8) - } - - // Main detect button - val detectButton = Button(this).apply { - text = "🔍 DETECT" - textSize = 10f - setBackgroundColor(0xFF4CAF50.toInt()) // Green - setTextColor(0xFFFFFFFF.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 80) - setOnClickListener { triggerManualDetection() } - } - - // Coordinate transform test buttons - val directButton = Button(this).apply { - text = "DIRECT" - textSize = 9f - setBackgroundColor(0xFF2196F3.toInt()) // Blue - setTextColor(0xFFFFFFFF.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 60) - setOnClickListener { - YOLOOnnxDetector.setCoordinateMode("DIRECT") - triggerManualDetection() - } - } - - val letterboxButton = Button(this).apply { - text = "LETTERBOX" - textSize = 9f - setBackgroundColor(0xFFFF9800.toInt()) // Orange - setTextColor(0xFFFFFFFF.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 60) - setOnClickListener { - YOLOOnnxDetector.setCoordinateMode("LETTERBOX") - triggerManualDetection() - } - } - - val hybridButton = Button(this).apply { - text = "HYBRID" - textSize = 9f - setBackgroundColor(0xFF9C27B0.toInt()) // Purple - setTextColor(0xFFFFFFFF.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 60) - setOnClickListener { - YOLOOnnxDetector.setCoordinateMode("HYBRID") - triggerManualDetection() - } - } - - // Class filter buttons for debugging - val shinyFilterButton = Button(this).apply { - text = "SHINY" - textSize = 8f - setBackgroundColor(0xFFFFD700.toInt()) // Gold - setTextColor(0xFF000000.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 60) - setOnClickListener { - YOLOOnnxDetector.setClassFilter("shiny_icon") - triggerManualDetection() - } - } - - val pokeballFilterButton = Button(this).apply { - text = "POKEBALL" - textSize = 8f - setBackgroundColor(0xFFE91E63.toInt()) // Pink - setTextColor(0xFFFFFFFF.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 60) - setOnClickListener { - YOLOOnnxDetector.setClassFilter("ball_icon_cherishball") - triggerManualDetection() - } - } - - val allClassesButton = Button(this).apply { - text = "ALL" - textSize = 8f - setBackgroundColor(0xFF607D8B.toInt()) // Blue Grey - setTextColor(0xFFFFFFFF.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 60) - setOnClickListener { - YOLOOnnxDetector.setClassFilter(null) // Show all classes - triggerManualDetection() - } - } - - val debugModeButton = Button(this).apply { - text = "DEBUG" - textSize = 8f - setBackgroundColor(0xFFFF5722.toInt()) // Deep Orange - setTextColor(0xFFFFFFFF.toInt()) - layoutParams = LinearLayout.LayoutParams(140, 60) - setOnClickListener { - YOLOOnnxDetector.toggleShowAllConfidences() - triggerManualDetection() - } + private fun convertImageToMat(image: Image): Mat? { + return try { + val planes = image.planes + val buffer = planes[0].buffer + val pixelStride = planes[0].pixelStride + val rowStride = planes[0].rowStride + val rowPadding = rowStride - pixelStride * screenWidth + + // Create bitmap from image + val bitmap = Bitmap.createBitmap( + screenWidth + rowPadding / pixelStride, + screenHeight, + Bitmap.Config.ARGB_8888 + ) + bitmap.copyPixelsFromBuffer(buffer) + + // Crop bitmap to remove padding if needed + val croppedBitmap = if (rowPadding == 0) { + bitmap + } else { + val cropped = Bitmap.createBitmap(bitmap, 0, 0, screenWidth, screenHeight) + bitmap.recycle() // Clean up original + cropped } + + // Convert bitmap to Mat + val mat = Mat() + Utils.bitmapToMat(croppedBitmap, mat) - buttonContainer.addView(detectButton) - buttonContainer.addView(directButton) - buttonContainer.addView(letterboxButton) - buttonContainer.addView(hybridButton) - buttonContainer.addView(shinyFilterButton) - buttonContainer.addView(pokeballFilterButton) - buttonContainer.addView(allClassesButton) - buttonContainer.addView(debugModeButton) - - overlayButton = buttonContainer - - val params = WindowManager.LayoutParams( - WindowManager.LayoutParams.WRAP_CONTENT, - WindowManager.LayoutParams.WRAP_CONTENT, - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { - WindowManager.LayoutParams.TYPE_APPLICATION_OVERLAY - } else { - @Suppress("DEPRECATION") - WindowManager.LayoutParams.TYPE_PHONE - }, - WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE, - PixelFormat.TRANSLUCENT - ).apply { - gravity = Gravity.TOP or Gravity.START - x = 100 - y = 200 - } + // Convert from RGBA to BGR (OpenCV format for proper color channel handling) + val bgrMat = Mat() + Imgproc.cvtColor(mat, bgrMat, Imgproc.COLOR_RGBA2BGR) - windowManager?.addView(overlayButton, params) - Log.d(TAG, "✅ Floating detection button created") + // Clean up + mat.release() + croppedBitmap.recycle() + bgrMat } catch (e: Exception) { - Log.e(TAG, "❌ Error creating floating button", e) - } - } - - private fun removeFloatingButton() { - try { - overlayButton?.let { button -> - windowManager?.removeView(button) - overlayButton = null - } - windowManager = null - Log.d(TAG, "🗑️ Floating button removed") - } catch (e: Exception) { - Log.e(TAG, "❌ Error removing floating button", e) + Log.e(TAG, "❌ Error converting image to Mat", e) + null } } - + private fun triggerManualDetection() { - Log.d(TAG, "🔍 Manual detection triggered!") + Log.d(TAG, "🔍 Manual detection triggered via MVC!") latestImage?.let { image -> try { - // Update main button to show processing (find the first button in the LinearLayout) - val mainButton = (overlayButton as? LinearLayout)?.getChildAt(0) as? Button - mainButton?.text = "⏳ PROCESSING..." - mainButton?.isEnabled = false + // Convert image to Mat for processing + val mat = convertImageToMat(image) - // Process the image - processImage(image) + if (mat != null) { + // Use controller to process detection (this will notify UI via callbacks) + val detections = detectionController.processDetection(mat) + + // Show detection overlay with results + if (detections.isNotEmpty()) { + showYOLODetectionOverlay(detections) + + // Extract Pokemon info using YOLO detections with OCR + extractPokemonInfoFromYOLOAsync(mat, detections) + } + + mat.release() + } else { + Log.e(TAG, "❌ Failed to convert image to Mat") + } // Close the image after processing to free the buffer image.close() latestImage = null - // Reset button after processing - handler.postDelayed({ - val resetButton = (overlayButton as? LinearLayout)?.getChildAt(0) as? Button - resetButton?.text = "🔍 DETECT" - resetButton?.isEnabled = true - }, 2000) - } catch (e: Exception) { Log.e(TAG, "❌ Error in manual detection", e) - val errorButton = (overlayButton as? LinearLayout)?.getChildAt(0) as? Button - errorButton?.text = "🔍 DETECT" - errorButton?.isEnabled = true } } ?: run { Log.w(TAG, "⚠️ No image available for detection") } } + override fun onDestroy() { super.onDestroy() hideDetectionOverlay() - removeFloatingButton() + floatingOrbUI?.hide() + detectionController.clearUICallbacks() yoloDetector?.release() ocrExecutor.shutdown() stopScreenCapture() diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt index 911464f..7ea6a35 100644 --- a/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt @@ -21,10 +21,10 @@ class YOLOOnnxDetector(private val context: Context) { private const val TAG = "YOLOOnnxDetector" private const val MODEL_FILE = "best.onnx" private const val INPUT_SIZE = 640 - private const val CONFIDENCE_THRESHOLD = 0.45f // Lowered to match .pt model detection levels + private const val CONFIDENCE_THRESHOLD = 0.55f private const val NMS_THRESHOLD = 0.3f // More aggressive merging of overlapping boxes private const val NUM_CHANNELS = 3 - private const val NUM_DETECTIONS = 8400 // YOLOv8 default + private const val NUM_DETECTIONS = 300 // ONNX model exported with NMS enabled private const val NUM_CLASSES = 95 // Your class count // Enhanced accuracy settings for ONNX (fixed input size) - WITH PER-METHOD COORDINATE TRANSFORM @@ -39,6 +39,7 @@ class YOLOOnnxDetector(private val context: Context) { var DEBUG_CLASS_FILTER: String? = null // Set to class name to show only that class var SHOW_ALL_CONFIDENCES = false // Show all detections with their confidences + fun setCoordinateMode(mode: String) { COORD_TRANSFORM_MODE = mode Log.i(TAG, "🔧 Coordinate transform mode changed to: $mode") @@ -402,6 +403,7 @@ class YOLOOnnxDetector(private val context: Context) { val outputTensor = result.get(0).value as Array> val flatOutput = outputTensor[0].flatMap { it.asIterable() }.toFloatArray() + // Post-process results with method-specific coordinate transformation val detections = postprocessWithMethod(flatOutput, inputMat.cols(), inputMat.rows(), INPUT_SIZE, method) @@ -774,6 +776,7 @@ class YOLOOnnxDetector(private val context: Context) { Log.d(TAG, "🔍 [DEBUG] Class: $className (ID: $classId), Confidence: %.3f, Original: %.3f".format(mappedConfidence, confidence)) } + // Apply class filtering if set val passesClassFilter = DEBUG_CLASS_FILTER == null || DEBUG_CLASS_FILTER == className @@ -1106,6 +1109,7 @@ class YOLOOnnxDetector(private val context: Context) { Log.d(TAG, "🔍 [DEBUG] Class: $className (ID: $classId), Confidence: %.3f, Original: %.3f".format(mappedConfidence, confidence)) } + // Apply class filtering if set val passesClassFilter = DEBUG_CLASS_FILTER == null || DEBUG_CLASS_FILTER == className diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/controllers/DetectionController.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/controllers/DetectionController.kt new file mode 100644 index 0000000..8c0af23 --- /dev/null +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/controllers/DetectionController.kt @@ -0,0 +1,144 @@ +package com.quillstudios.pokegoalshelper.controllers + +import android.util.Log +import com.quillstudios.pokegoalshelper.YOLOOnnxDetector +import com.quillstudios.pokegoalshelper.Detection +import com.quillstudios.pokegoalshelper.ui.interfaces.DetectionUIEvents +import com.quillstudios.pokegoalshelper.ui.interfaces.DetectionUICallbacks +import org.opencv.core.Mat + +/** + * Controller handling detection business logic. + * Decouples UI interactions from YOLO detection implementation. + */ +class DetectionController( + private val yoloDetector: YOLOOnnxDetector +) : DetectionUIEvents { + + companion object { + private const val TAG = "DetectionController" + } + + private var uiCallbacks: DetectionUICallbacks? = null + private var currentSettings = DetectionSettings() + + /** + * Register UI callbacks for status updates + */ + fun setUICallbacks(callbacks: DetectionUICallbacks) { + uiCallbacks = callbacks + } + + /** + * Remove UI callbacks (cleanup) + */ + fun clearUICallbacks() { + uiCallbacks = null + } + + // === DetectionUIEvents Implementation === + + override fun onDetectionRequested() { + Log.d(TAG, "🔍 Detection requested via controller") + // The actual detection will be triggered by the service layer + // This event will be handled by the service which has access to the image + detectionRequestCallback?.invoke() + } + + private var detectionRequestCallback: (() -> Unit)? = null + + /** + * Set callback for when UI requests detection + * This allows the service layer to handle the actual detection + */ + fun setDetectionRequestCallback(callback: () -> Unit) { + detectionRequestCallback = callback + } + + override fun onClassFilterChanged(className: String?) { + Log.i(TAG, "🔍 Class filter changed to: ${className ?: "ALL CLASSES"}") + + currentSettings.classFilter = className + + // Apply filter to YOLO detector + YOLOOnnxDetector.setClassFilter(className) + + // Notify UI of settings change + uiCallbacks?.onSettingsChanged( + currentSettings.classFilter, + currentSettings.debugMode, + currentSettings.coordinateMode + ) + } + + override fun onDebugModeToggled() { + currentSettings.debugMode = !currentSettings.debugMode + Log.i(TAG, "📊 Debug mode toggled: ${currentSettings.debugMode}") + + // Apply debug mode to YOLO detector + YOLOOnnxDetector.toggleShowAllConfidences() + + // Notify UI of settings change + uiCallbacks?.onSettingsChanged( + currentSettings.classFilter, + currentSettings.debugMode, + currentSettings.coordinateMode + ) + } + + override fun onCoordinateModeChanged(mode: String) { + Log.i(TAG, "🔧 Coordinate mode changed to: $mode") + + currentSettings.coordinateMode = mode + + // Apply coordinate mode to YOLO detector + YOLOOnnxDetector.setCoordinateMode(mode) + + // Notify UI of settings change + uiCallbacks?.onSettingsChanged( + currentSettings.classFilter, + currentSettings.debugMode, + currentSettings.coordinateMode + ) + } + + // === Business Logic Methods === + + /** + * Process detection on the given image + * This will be called by the service layer + */ + fun processDetection(inputMat: Mat): List { + return try { + uiCallbacks?.onDetectionStarted() + + val detections = yoloDetector.detect(inputMat) + val detectionCount = detections.size + + Log.i(TAG, "✅ Detection completed: $detectionCount objects found") + uiCallbacks?.onDetectionCompleted(detectionCount) + + detections + } catch (e: Exception) { + Log.e(TAG, "❌ Detection failed", e) + uiCallbacks?.onDetectionFailed(e.message ?: "Unknown error") + emptyList() + } + } + + /** + * Get current detection settings + */ + fun getCurrentSettings(): DetectionSettings { + return currentSettings.copy() + } +} + +/** + * Data class representing current detection settings + */ +data class DetectionSettings( + var classFilter: String? = null, + var debugMode: Boolean = false, + var coordinateMode: String = "HYBRID" +) \ No newline at end of file diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/ui/FloatingOrbUI.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/ui/FloatingOrbUI.kt new file mode 100644 index 0000000..feb1761 --- /dev/null +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/ui/FloatingOrbUI.kt @@ -0,0 +1,270 @@ +package com.quillstudios.pokegoalshelper.ui + +import android.content.Context +import android.graphics.PixelFormat +import android.os.Build +import android.util.Log +import android.view.Gravity +import android.view.View +import android.view.ViewGroup +import android.view.WindowManager +import android.widget.Button +import android.widget.LinearLayout +import com.quillstudios.pokegoalshelper.ui.interfaces.DetectionUIEvents +import com.quillstudios.pokegoalshelper.ui.interfaces.DetectionUICallbacks + +/** + * Floating orb UI component that handles user interactions. + * Implements CalcIV-style expandable menu system. + * + * This is pure UI logic - no business logic or direct detector calls. + */ +class FloatingOrbUI( + private val context: Context, + private val detectionEvents: DetectionUIEvents +) : DetectionUICallbacks { + + companion object { + private const val TAG = "FloatingOrbUI" + private const val ORB_SIZE = 120 + private const val MENU_BUTTON_WIDTH = 160 + private const val MENU_BUTTON_HEIGHT = 60 + } + + private var windowManager: WindowManager? = null + private var orbButton: View? = null + private var expandedMenu: View? = null + private var isMenuExpanded = false + private var isProcessing = false + + /** + * Initialize and show the floating orb + */ + fun show() { + try { + if (orbButton != null) return // Already shown + + windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager + createFloatingOrb() + Log.d(TAG, "✅ Floating orb UI shown") + + } catch (e: Exception) { + Log.e(TAG, "❌ Error showing floating orb", e) + } + } + + /** + * Hide and cleanup the floating orb + */ + fun hide() { + try { + if (isMenuExpanded) { + collapseMenu() + } + + orbButton?.let { + windowManager?.removeView(it) + orbButton = null + } + + windowManager = null + Log.d(TAG, "🗑️ Floating orb UI hidden") + + } catch (e: Exception) { + Log.e(TAG, "❌ Error hiding floating orb", e) + } + } + + // === DetectionUICallbacks Implementation === + + override fun onDetectionStarted() { + isProcessing = true + updateOrbAppearance() + + // Auto-collapse menu during processing + if (isMenuExpanded) { + collapseMenu() + } + } + + override fun onDetectionCompleted(detectionCount: Int) { + isProcessing = false + updateOrbAppearance() + Log.d(TAG, "🎯 Detection completed: $detectionCount objects") + } + + override fun onDetectionFailed(error: String) { + isProcessing = false + updateOrbAppearance() + Log.e(TAG, "❌ Detection failed: $error") + } + + override fun onSettingsChanged(filterClass: String?, debugMode: Boolean, coordinateMode: String) { + Log.d(TAG, "⚙️ Settings updated - Filter: $filterClass, Debug: $debugMode, Mode: $coordinateMode") + // UI could update visual indicators here if needed + } + + // === Private UI Methods === + + private fun createFloatingOrb() { + orbButton = Button(context).apply { + text = "🎯" + textSize = 20f + setBackgroundResource(android.R.drawable.btn_default) + background.setTint(0xFF4CAF50.toInt()) // Green + setTextColor(0xFFFFFFFF.toInt()) + + width = ORB_SIZE + height = ORB_SIZE + layoutParams = ViewGroup.LayoutParams(ORB_SIZE, ORB_SIZE) + + setOnClickListener { handleOrbClick() } + } + + val params = WindowManager.LayoutParams( + ORB_SIZE, ORB_SIZE, + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + WindowManager.LayoutParams.TYPE_APPLICATION_OVERLAY + } else { + @Suppress("DEPRECATION") + WindowManager.LayoutParams.TYPE_PHONE + }, + WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE, + PixelFormat.TRANSLUCENT + ).apply { + gravity = Gravity.TOP or Gravity.START + x = 50 + y = 200 + } + + windowManager?.addView(orbButton, params) + } + + private fun handleOrbClick() { + if (isProcessing) { + Log.d(TAG, "⚠️ Ignoring click - detection in progress") + return + } + + if (isMenuExpanded) { + collapseMenu() + } else { + expandMenu() + } + } + + private fun expandMenu() { + if (isMenuExpanded || isProcessing) return + + val menuContainer = LinearLayout(context).apply { + orientation = LinearLayout.VERTICAL + setBackgroundColor(0xE0000000.toInt()) // Semi-transparent black + setPadding(16, 16, 16, 16) + } + + // Define menu options with their actions + val menuItems = listOf( + MenuOption("🔍 DETECT", 0xFF4CAF50.toInt()) { + detectionEvents.onDetectionRequested() + }, + MenuOption("SHINY", 0xFFFFD700.toInt()) { + detectionEvents.onClassFilterChanged("shiny_icon") + detectionEvents.onDetectionRequested() + }, + MenuOption("POKEBALL", 0xFFE91E63.toInt()) { + detectionEvents.onClassFilterChanged("ball_icon_cherishball") + detectionEvents.onDetectionRequested() + }, + MenuOption("ALL", 0xFF607D8B.toInt()) { + detectionEvents.onClassFilterChanged(null) + detectionEvents.onDetectionRequested() + }, + MenuOption("DEBUG", 0xFFFF5722.toInt()) { + detectionEvents.onDebugModeToggled() + detectionEvents.onDetectionRequested() + } + ) + + menuItems.forEach { option -> + val button = createMenuButton(option) + menuContainer.addView(button) + } + + expandedMenu = menuContainer + + val params = WindowManager.LayoutParams( + WindowManager.LayoutParams.WRAP_CONTENT, + WindowManager.LayoutParams.WRAP_CONTENT, + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + WindowManager.LayoutParams.TYPE_APPLICATION_OVERLAY + } else { + @Suppress("DEPRECATION") + WindowManager.LayoutParams.TYPE_PHONE + }, + WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE, + PixelFormat.TRANSLUCENT + ).apply { + gravity = Gravity.TOP or Gravity.START + x = 180 // Position next to the orb + y = 200 + } + + windowManager?.addView(expandedMenu, params) + isMenuExpanded = true + updateOrbAppearance() + } + + private fun collapseMenu() { + if (!isMenuExpanded) return + + expandedMenu?.let { windowManager?.removeView(it) } + expandedMenu = null + isMenuExpanded = false + updateOrbAppearance() + } + + private fun createMenuButton(option: MenuOption): Button { + return Button(context).apply { + text = option.text + textSize = 14f // Increased text size + setBackgroundColor(option.color) + setTextColor(0xFFFFFFFF.toInt()) + setPadding(8, 4, 8, 4) // Add padding for better text spacing + layoutParams = LinearLayout.LayoutParams(MENU_BUTTON_WIDTH, MENU_BUTTON_HEIGHT).apply { + setMargins(0, 0, 0, 8) + } + setOnClickListener { + option.action() + collapseMenu() + } + } + } + + private fun updateOrbAppearance() { + (orbButton as? Button)?.apply { + when { + isProcessing -> { + text = "⏳" + background.setTint(0xFFFF9800.toInt()) // Orange + } + isMenuExpanded -> { + text = "✖" + background.setTint(0xFFFF5722.toInt()) // Orange-red + } + else -> { + text = "🎯" + background.setTint(0xFF4CAF50.toInt()) // Green + } + } + } + } + + /** + * Data class for menu options + */ + private data class MenuOption( + val text: String, + val color: Int, + val action: () -> Unit + ) +} \ No newline at end of file diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/ui/interfaces/DetectionUIEvents.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/ui/interfaces/DetectionUIEvents.kt new file mode 100644 index 0000000..287bf72 --- /dev/null +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/ui/interfaces/DetectionUIEvents.kt @@ -0,0 +1,60 @@ +package com.quillstudios.pokegoalshelper.ui.interfaces + +/** + * Interface for UI events related to detection functionality. + * UI components implement this to communicate with business logic controllers. + */ +interface DetectionUIEvents { + /** + * Triggered when user requests manual detection + */ + fun onDetectionRequested() + + /** + * Triggered when user changes class filter + * @param className Name of class to filter, or null for all classes + */ + fun onClassFilterChanged(className: String?) + + /** + * Triggered when user toggles debug mode + */ + fun onDebugModeToggled() + + /** + * Triggered when user changes coordinate transformation mode + * @param mode Transformation mode (DIRECT, LETTERBOX, HYBRID) + */ + fun onCoordinateModeChanged(mode: String) +} + +/** + * Interface for callbacks from business logic back to UI. + * UI components implement this to receive status updates. + */ +interface DetectionUICallbacks { + /** + * Called when detection starts processing + */ + fun onDetectionStarted() + + /** + * Called when detection completes successfully + * @param detectionCount Number of objects detected + */ + fun onDetectionCompleted(detectionCount: Int) + + /** + * Called when detection fails + * @param error Error message + */ + fun onDetectionFailed(error: String) + + /** + * Called when settings change + * @param filterClass Current class filter (null if showing all) + * @param debugMode Current debug mode state + * @param coordinateMode Current coordinate transformation mode + */ + fun onSettingsChanged(filterClass: String?, debugMode: Boolean, coordinateMode: String) +} \ No newline at end of file diff --git a/raw_models/best.onnx b/raw_models/best.onnx new file mode 100644 index 0000000..9bb002e Binary files /dev/null and b/raw_models/best.onnx differ diff --git a/raw_models/best.pt b/raw_models/best.pt new file mode 100644 index 0000000..ee640f4 Binary files /dev/null and b/raw_models/best.pt differ diff --git a/tools/debug_scripts/README.md b/tools/debug_scripts/README.md new file mode 100644 index 0000000..85221a0 --- /dev/null +++ b/tools/debug_scripts/README.md @@ -0,0 +1,51 @@ +# Debug Scripts for YOLO ONNX Detection + +This directory contains debugging tools for troubleshooting YOLO object detection issues. + +## Setup + +1. Create a Python virtual environment: +```bash +python -m venv debug_env +source debug_env/bin/activate # On Windows: debug_env\Scripts\activate +``` + +2. Install dependencies: +```bash +pip install -r requirements.txt +``` + +## Scripts + +### `debug_model_comparison.py` +Compares .pt model predictions with ONNX model outputs on the same static test image. +- Tests both PyTorch and ONNX models side-by-side +- Provides detailed debug output including preprocessing steps +- Useful for identifying model export issues + +### `test_static_onnx.py` +Tests ONNX model against static images to isolate Android capture issues. +- Bypasses Android screen capture pipeline +- Tests multiple ONNX model variants +- Good for validating model functionality + +### `export_model_variants.py` +Exports YOLO model variants with different NMS settings. +- Creates models with different confidence/IoU thresholds +- Useful for debugging detection sensitivity issues + +### `inspect_onnx_model.py` +Inspects ONNX model structure and metadata. +- Verifies class mappings and model architecture +- Helpful for debugging model export problems + +## Usage + +Place test images in `../../test_images/` and ensure model files are in `../../raw_models/`. + +Example: +```bash +cd tools/debug_scripts +source debug_env/bin/activate +python debug_model_comparison.py +``` \ No newline at end of file diff --git a/tools/debug_scripts/debug_model_comparison.py b/tools/debug_scripts/debug_model_comparison.py new file mode 100644 index 0000000..6bbcffe --- /dev/null +++ b/tools/debug_scripts/debug_model_comparison.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python3 +""" +Compare .pt model predictions with ONNX model outputs on the same static test image +Provides detailed debug output to identify differences in preprocessing and inference +""" + +import cv2 +import numpy as np +from ultralytics import YOLO +import onnxruntime as ort +import torch +import os +from pathlib import Path + +# Force CPU-only execution to avoid CUDA compatibility issues +os.environ['CUDA_VISIBLE_DEVICES'] = '' +torch.cuda.is_available = lambda: False + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + """Letterbox preprocessing - exact copy of YOLO preprocessing""" + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def preprocess_image(image_path, target_size=(640, 640)): + """Preprocess image for ONNX model - matches Android preprocessing""" + print(f"📸 Loading image: {image_path}") + + # Load image + img = cv2.imread(str(image_path)) + if img is None: + raise ValueError(f"Could not load image: {image_path}") + + print(f" Original size: {img.shape}") + + # Apply letterbox (same as YOLO preprocessing) + img_processed, ratio, pad = letterbox(img, target_size) + print(f" Letterboxed size: {img_processed.shape}") + print(f" Scale ratio: {ratio}") + print(f" Padding (dw, dh): {pad}") + + # Convert BGR to RGB + img_rgb = cv2.cvtColor(img_processed, cv2.COLOR_BGR2RGB) + + # Normalize to [0, 1] and convert to CHW format + img_normalized = img_rgb.astype(np.float32) / 255.0 + img_chw = np.transpose(img_normalized, (2, 0, 1)) + img_batch = np.expand_dims(img_chw, axis=0) + + print(f" Final tensor shape: {img_batch.shape}") + print(f" Value range: [{img_batch.min():.3f}, {img_batch.max():.3f}]") + + return img_batch, img, ratio, pad + +def run_pt_model(model_path, image_path): + """Run .pt model prediction with full debug output""" + print("\n🔥 Running .pt model prediction:") + print(f" Model: {model_path}") + + # Load model + model = YOLO(model_path) + + # Run prediction with verbose output + results = model.predict( + source=str(image_path), + conf=0.01, # Very low confidence to catch everything + iou=0.5, + max_det=1000, + verbose=True, + save=False + ) + + result = results[0] + print(f" Found {len(result.boxes)} detections") + + # Extract raw data + if len(result.boxes) > 0: + boxes = result.boxes.xyxy.cpu().numpy() # x1, y1, x2, y2 + confidences = result.boxes.conf.cpu().numpy() + classes = result.boxes.cls.cpu().numpy().astype(int) + + print(f"\n📊 .pt Model Results Summary:") + print(f" Total detections: {len(boxes)}") + + # Group by class + class_counts = {} + for cls_id in classes: + class_counts[cls_id] = class_counts.get(cls_id, 0) + 1 + + print(f" Classes found: {sorted(class_counts.keys())}") + + # Focus on shiny icon (class 50) + shiny_detections = [(i, conf) for i, (cls_id, conf) in enumerate(zip(classes, confidences)) if cls_id == 50] + if shiny_detections: + print(f"\n✨ SHINY ICON DETECTIONS (Class 50):") + for i, conf in shiny_detections: + box = boxes[i] + print(f" Detection {i}: conf={conf:.6f}, box=[{box[0]:.1f},{box[1]:.1f},{box[2]:.1f},{box[3]:.1f}]") + else: + print(f"\n❌ NO SHINY ICON DETECTIONS (Class 50)") + + # Show all detections with confidence > 0.1 + high_conf_detections = [(i, cls_id, conf) for i, (cls_id, conf) in enumerate(zip(classes, confidences)) if conf > 0.1] + if high_conf_detections: + print(f"\n🎯 High confidence detections (>0.1):") + for i, cls_id, conf in high_conf_detections[:10]: # Show top 10 + box = boxes[i] + print(f" Class {cls_id}: conf={conf:.6f}, box=[{box[0]:.1f},{box[1]:.1f},{box[2]:.1f},{box[3]:.1f}]") + + return boxes, confidences, classes + else: + print(f"\n❌ NO DETECTIONS FOUND") + return None, None, None + +def run_onnx_model(model_path, preprocessed_img): + """Run ONNX model inference with full debug output""" + print(f"\n🔧 Running ONNX model inference:") + print(f" Model: {model_path}") + + # Load ONNX model + session = ort.InferenceSession(str(model_path)) + + # Get model info + input_name = session.get_inputs()[0].name + output_names = [output.name for output in session.get_outputs()] + + print(f" Input name: {input_name}") + print(f" Output names: {output_names}") + print(f" Input shape: {preprocessed_img.shape}") + + # Run inference + outputs = session.run(output_names, {input_name: preprocessed_img}) + + print(f" Number of outputs: {len(outputs)}") + for i, output in enumerate(outputs): + print(f" Output {i} shape: {output.shape}") + + # Process main output (should be detections) + detections = outputs[0] # Usually the first output contains detections + + if len(detections.shape) == 3: + batch_size, num_detections, num_values = detections.shape + print(f" Detections tensor: [{batch_size}, {num_detections}, {num_values}]") + + # Extract detections from batch + detection_data = detections[0] # Remove batch dimension + + if num_values == 6: # NMS format: [x, y, w, h, conf, class] + print(f" Format: NMS output (x, y, w, h, conf, class)") + + # Count valid detections (non-zero confidence) + valid_mask = detection_data[:, 4] > 0.000001 # conf > 0 + valid_detections = detection_data[valid_mask] + + print(f" Valid detections: {len(valid_detections)} / {num_detections}") + + if len(valid_detections) > 0: + confidences = valid_detections[:, 4] + classes = valid_detections[:, 5].astype(int) + + # Group by class + class_counts = {} + for cls_id in classes: + class_counts[cls_id] = class_counts.get(cls_id, 0) + 1 + + print(f" Classes found: {sorted(class_counts.keys())}") + + # Focus on shiny icon (class 50) + shiny_mask = classes == 50 + shiny_detections = valid_detections[shiny_mask] + + if len(shiny_detections) > 0: + print(f"\n✨ SHINY ICON DETECTIONS (Class 50): {len(shiny_detections)}") + for i, det in enumerate(shiny_detections): + print(f" Detection {i}: conf={det[4]:.6f}, box=[{det[0]:.1f},{det[1]:.1f},{det[2]:.1f},{det[3]:.1f}]") + else: + print(f"\n❌ NO SHINY ICON DETECTIONS (Class 50)") + + # Show high confidence detections + high_conf_mask = confidences > 0.1 + high_conf_detections = valid_detections[high_conf_mask] + + if len(high_conf_detections) > 0: + print(f"\n🎯 High confidence detections (>0.1): {len(high_conf_detections)}") + for i, det in enumerate(high_conf_detections[:10]): # Show top 10 + print(f" Class {int(det[5])}: conf={det[4]:.6f}, box=[{det[0]:.1f},{det[1]:.1f},{det[2]:.1f},{det[3]:.1f}]") + + return valid_detections + + elif num_values > 80: # Raw format: [x, y, w, h, obj, class0, class1, ...] + print(f" Format: Raw output ({num_values-5} classes)") + + # This would need more complex processing for raw outputs + print(f" ⚠️ Raw format detected - would need objectness * class confidence processing") + return None + + else: + print(f" ⚠️ Unexpected output shape: {detections.shape}") + return None + +def compare_models(pt_model_path, onnx_model_path, test_image_path): + """Compare .pt and ONNX model outputs on the same image""" + print("="*80) + print("🔍 MODEL COMPARISON DEBUG SESSION") + print("="*80) + + # Check if files exist + for path, name in [(pt_model_path, ".pt model"), (onnx_model_path, "ONNX model"), (test_image_path, "test image")]: + if not Path(path).exists(): + print(f"❌ {name} not found: {path}") + return + + # Preprocess image for ONNX + try: + preprocessed_img, original_img, ratio, pad = preprocess_image(test_image_path) + except Exception as e: + print(f"❌ Failed to preprocess image: {e}") + return + + # Run .pt model + try: + pt_boxes, pt_confidences, pt_classes = run_pt_model(pt_model_path, test_image_path) + except Exception as e: + print(f"❌ Failed to run .pt model: {e}") + pt_boxes, pt_confidences, pt_classes = None, None, None + + # Run ONNX model + try: + onnx_detections = run_onnx_model(onnx_model_path, preprocessed_img) + except Exception as e: + print(f"❌ Failed to run ONNX model: {e}") + onnx_detections = None + + # Compare results + print("\n" + "="*80) + print("📊 COMPARISON SUMMARY") + print("="*80) + + # Count shiny detections + pt_shiny_count = 0 + onnx_shiny_count = 0 + + if pt_classes is not None: + pt_shiny_count = np.sum(pt_classes == 50) + + if onnx_detections is not None and len(onnx_detections) > 0: + if onnx_detections.shape[1] == 6: # NMS format + onnx_classes = onnx_detections[:, 5].astype(int) + onnx_shiny_count = np.sum(onnx_classes == 50) + + print(f"🔥 .pt Model Results:") + print(f" Total detections: {len(pt_boxes) if pt_boxes is not None else 0}") + print(f" Shiny icons (class 50): {pt_shiny_count}") + + print(f"\n🔧 ONNX Model Results:") + print(f" Total detections: {len(onnx_detections) if onnx_detections is not None else 0}") + print(f" Shiny icons (class 50): {onnx_shiny_count}") + + if pt_shiny_count > 0 and onnx_shiny_count == 0: + print(f"\n🚨 ISSUE CONFIRMED: .pt model finds {pt_shiny_count} shiny icons, ONNX finds 0") + print(f" This confirms the preprocessing/inference discrepancy") + elif pt_shiny_count == onnx_shiny_count and pt_shiny_count > 0: + print(f"\n✅ Both models find {pt_shiny_count} shiny icons - issue may be elsewhere") + + print("\n" + "="*80) + +if __name__ == "__main__": + # Test with available models and image + pt_model = "raw_models/best.pt" + + # Test multiple ONNX variants + onnx_models = [ + "app/src/main/assets/best.onnx", + "raw_models/exports/best_no_nms.onnx", + "raw_models/exports/best_nms_relaxed.onnx", + "raw_models/exports/best_nms_very_relaxed.onnx" + ] + + # You'll need to provide a test image with known shiny icon + test_image = "test_images/shiny_test.jpg" # Replace with actual test image path + + print("🔍 Looking for test images...") + + # Try to find a suitable test image + test_image_candidates = [ + "test_images/shiny_test.jpg", + "test_images/test.jpg", + "screenshots/shiny.jpg", + "screenshots/test.png" + ] + + test_image_found = None + for candidate in test_image_candidates: + if Path(candidate).exists(): + test_image_found = candidate + print(f" Found test image: {candidate}") + break + + if not test_image_found: + print("❌ No test image found. Please provide a test image with shiny icon at one of these paths:") + for candidate in test_image_candidates: + print(f" {candidate}") + print("\nYou can capture a screenshot with shiny icon and save it as test_images/shiny_test.jpg") + exit(1) + + # Run comparison for each ONNX model + for onnx_model in onnx_models: + if Path(onnx_model).exists(): + print(f"\n🔄 Testing ONNX model: {onnx_model}") + compare_models(pt_model, onnx_model, test_image_found) + print("\n" + "="*120 + "\n") + else: + print(f"⚠️ ONNX model not found: {onnx_model}") \ No newline at end of file diff --git a/tools/debug_scripts/export_model_variants.py b/tools/debug_scripts/export_model_variants.py new file mode 100644 index 0000000..e9b5291 --- /dev/null +++ b/tools/debug_scripts/export_model_variants.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Export YOLO model variants with different NMS settings for shiny icon debugging +""" + +from ultralytics import YOLO +import os + +def export_model_variants(): + model_path = "./raw_models/best.pt" + output_dir = "./raw_models/exports" + + # Create output directory + os.makedirs(output_dir, exist_ok=True) + + print(f"Loading model from: {model_path}") + model = YOLO(model_path) + + # Export configurations to test + configs = [ + { + "name": "no_nms", + "nms": False, + "simplify": True, + "description": "Raw model output without NMS - for debugging shiny detection" + }, + { + "name": "nms_relaxed", + "nms": True, + "max_det": 500, # Increase from default 300 + "conf": 0.1, # Lower confidence threshold + "simplify": True, + "description": "NMS with more detections and lower confidence" + }, + { + "name": "nms_very_relaxed", + "nms": True, + "max_det": 1000, # Even more detections + "conf": 0.05, # Very low confidence + "simplify": True, + "description": "NMS with maximum detections for rare classes" + } + ] + + for config in configs: + try: + print(f"\n🚀 Exporting {config['name']}: {config['description']}") + + # Extract export parameters + export_params = {k: v for k, v in config.items() + if k not in ['name', 'description']} + + # Export model + exported_path = model.export( + format='onnx', + **export_params + ) + + # Move to organized location + output_file = os.path.join(output_dir, f"best_{config['name']}.onnx") + if os.path.exists(exported_path): + os.rename(exported_path, output_file) + print(f"✅ Exported: {output_file}") + else: + print(f"❌ Export failed for {config['name']}") + + except Exception as e: + print(f"❌ Error exporting {config['name']}: {e}") + + print(f"\n📁 All exports saved to: {output_dir}") + print("\n📋 Summary:") + print("- best_no_nms.onnx: Raw 8400x99 output for debugging") + print("- best_nms_relaxed.onnx: NMS with 500 max detections") + print("- best_nms_very_relaxed.onnx: NMS with 1000 max detections") + print("\nNext: Copy desired model to app/src/main/assets/ as best.onnx") + +if __name__ == "__main__": + export_model_variants() \ No newline at end of file diff --git a/tools/debug_scripts/inspect_onnx_model.py b/tools/debug_scripts/inspect_onnx_model.py new file mode 100644 index 0000000..5cda823 --- /dev/null +++ b/tools/debug_scripts/inspect_onnx_model.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Inspect ONNX model structure to verify class mappings +""" + +import onnx +import numpy as np + +def inspect_onnx_model(model_path): + print(f"Inspecting ONNX model: {model_path}") + + try: + # Load the model + model = onnx.load(model_path) + + print(f"\n📋 Model Info:") + print(f"IR Version: {model.ir_version}") + print(f"Producer: {model.producer_name} {model.producer_version}") + + # Check inputs + print(f"\n📥 Inputs:") + for input_info in model.graph.input: + print(f" {input_info.name}: {[d.dim_value for d in input_info.type.tensor_type.shape.dim]}") + + # Check outputs + print(f"\n📤 Outputs:") + for output_info in model.graph.output: + shape = [d.dim_value for d in output_info.type.tensor_type.shape.dim] + print(f" {output_info.name}: {shape}") + + # For NMS models, try to interpret the output format + if len(shape) == 3 and shape[2] == 6: + print(f" → NMS format: [batch, {shape[1]} detections, 6 values (x,y,w,h,conf,class)]") + elif len(shape) == 3 and shape[1] > 90: + print(f" → Raw format: [batch, {shape[1]} channels, {shape[2]} anchors]") + print(f" → Channels: 4 coords + {shape[1]-4} classes") + + # Check for any metadata about classes + print(f"\n🏷️ Metadata:") + for prop in model.metadata_props: + print(f" {prop.key}: {prop.value}") + + print(f"\n🔍 Model Summary: {len(model.graph.node)} nodes, {len(model.graph.initializer)} initializers") + + except Exception as e: + print(f"❌ Error inspecting model: {e}") + +if __name__ == "__main__": + models_to_check = [ + "app/src/main/assets/best.onnx", + "raw_models/exports/best_no_nms.onnx", + "raw_models/exports/best_nms_relaxed.onnx", + "raw_models/exports/best_nms_very_relaxed.onnx" + ] + + for model_path in models_to_check: + try: + inspect_onnx_model(model_path) + print("\n" + "="*60 + "\n") + except FileNotFoundError: + print(f"⚠️ Model not found: {model_path}\n") \ No newline at end of file diff --git a/tools/debug_scripts/requirements.txt b/tools/debug_scripts/requirements.txt new file mode 100644 index 0000000..8cd6af7 --- /dev/null +++ b/tools/debug_scripts/requirements.txt @@ -0,0 +1,6 @@ +ultralytics>=8.0.0 +opencv-python>=4.5.0 +onnxruntime>=1.15.0 +onnx>=1.14.0 +numpy>=1.21.0 +Pillow>=8.0.0 \ No newline at end of file diff --git a/tools/debug_scripts/test_static_onnx.py b/tools/debug_scripts/test_static_onnx.py new file mode 100644 index 0000000..76d67d9 --- /dev/null +++ b/tools/debug_scripts/test_static_onnx.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +""" +Test ONNX model against static images to isolate OpenCV capture issues +This bypasses Android screen capture and tests pure ONNX inference +""" + +import cv2 +import numpy as np +import onnxruntime as ort +import os +from pathlib import Path + +# Force CPU-only execution to avoid CUDA compatibility issues +os.environ['CUDA_VISIBLE_DEVICES'] = '' + +def letterbox_preprocess(img, target_size=(640, 640)): + """Exact letterbox preprocessing matching Android implementation""" + h, w = img.shape[:2] + + # Calculate scale factor + scale = min(target_size[0] / h, target_size[1] / w) + + # Calculate new dimensions + new_w = int(w * scale) + new_h = int(h * scale) + + # Resize image + resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + + # Create padded image + padded = np.full((target_size[0], target_size[1], 3), 114, dtype=np.uint8) + + # Calculate padding offsets + pad_x = (target_size[1] - new_w) // 2 + pad_y = (target_size[0] - new_h) // 2 + + # Place resized image in center + padded[pad_y:pad_y + new_h, pad_x:pad_x + new_w] = resized + + return padded, scale, (pad_x, pad_y) + +def test_onnx_static(model_path, image_path, confidence_threshold=0.01): + """Test ONNX model on static image with detailed output""" + print(f"🔧 Testing ONNX model: {Path(model_path).name}") + print(f"📸 Image: {Path(image_path).name}") + + # Load image + img = cv2.imread(str(image_path)) + if img is None: + print(f"❌ Could not load image: {image_path}") + return None + + print(f" Original image size: {img.shape}") + + # Preprocess + processed_img, scale, padding = letterbox_preprocess(img) + print(f" Processed size: {processed_img.shape}") + print(f" Scale factor: {scale:.4f}") + print(f" Padding (x, y): {padding}") + + # Convert for ONNX (RGB, normalize, CHW, batch) + img_rgb = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB) + img_norm = img_rgb.astype(np.float32) / 255.0 + img_chw = np.transpose(img_norm, (2, 0, 1)) + img_batch = np.expand_dims(img_chw, axis=0) + + print(f" Final tensor: {img_batch.shape}, range: [{img_batch.min():.3f}, {img_batch.max():.3f}]") + + # Load ONNX model + try: + session = ort.InferenceSession(str(model_path)) + input_name = session.get_inputs()[0].name + print(f" Model loaded, input: {input_name}") + except Exception as e: + print(f"❌ Failed to load ONNX model: {e}") + return None + + # Run inference + try: + outputs = session.run(None, {input_name: img_batch}) + print(f" Inference successful, {len(outputs)} outputs") + except Exception as e: + print(f"❌ Inference failed: {e}") + return None + + # Process outputs + if len(outputs) == 0: + print(f"❌ No outputs from model") + return None + + detections = outputs[0] + print(f" Detection tensor shape: {detections.shape}") + + if len(detections.shape) != 3: + print(f"❌ Unexpected detection shape: {detections.shape}") + return None + + batch_size, num_detections, num_values = detections.shape + detection_data = detections[0] # Remove batch dimension + + if num_values == 6: # NMS format + print(f" Format: NMS (x, y, w, h, conf, class)") + + # Filter by confidence + valid_mask = detection_data[:, 4] > confidence_threshold + valid_detections = detection_data[valid_mask] + + print(f" Valid detections (conf > {confidence_threshold}): {len(valid_detections)}") + + if len(valid_detections) == 0: + print(f" ❌ No detections above confidence threshold") + return [] + + # Analyze by class + classes = valid_detections[:, 5].astype(int) + confidences = valid_detections[:, 4] + + class_counts = {} + for cls_id in classes: + class_counts[cls_id] = class_counts.get(cls_id, 0) + 1 + + print(f" Classes detected: {sorted(class_counts.keys())}") + + # Focus on shiny icons (class 50) + shiny_mask = classes == 50 + shiny_detections = valid_detections[shiny_mask] + + if len(shiny_detections) > 0: + print(f" ✨ SHINY ICONS FOUND: {len(shiny_detections)}") + for i, det in enumerate(shiny_detections): + x, y, w, h, conf, cls = det + print(f" Shiny {i+1}: conf={conf:.6f}, box=[{x:.1f}, {y:.1f}, {w:.1f}, {h:.1f}]") + else: + print(f" ❌ NO SHINY ICONS (class 50) detected") + + # Show top detections + if len(valid_detections) > 0: + # Sort by confidence + sorted_indices = np.argsort(confidences)[::-1] + top_detections = valid_detections[sorted_indices[:10]] + + print(f" 🎯 Top 10 detections:") + for i, det in enumerate(top_detections): + x, y, w, h, conf, cls = det + print(f" {i+1}. Class {int(cls)}: conf={conf:.4f}, box=[{x:.1f}, {y:.1f}, {w:.1f}, {h:.1f}]") + + return valid_detections + + else: + print(f" ⚠️ Raw format detected ({num_values} values) - not processed") + return None + +def test_multiple_models(image_path): + """Test multiple ONNX models on the same image""" + print("="*80) + print("🔍 STATIC IMAGE ONNX TESTING") + print("="*80) + + models_to_test = [ + "app/src/main/assets/best.onnx", + "raw_models/exports/best_no_nms.onnx", + "raw_models/exports/best_nms_relaxed.onnx", + "raw_models/exports/best_nms_very_relaxed.onnx" + ] + + results = {} + + for model_path in models_to_test: + if Path(model_path).exists(): + print(f"\n{'='*60}") + detections = test_onnx_static(model_path, image_path) + results[model_path] = detections + else: + print(f"\n⚠️ Model not found: {model_path}") + results[model_path] = None + + # Summary comparison + print(f"\n{'='*80}") + print("📊 COMPARISON SUMMARY") + print("="*80) + + for model_path, detections in results.items(): + model_name = Path(model_path).name + + if detections is None: + print(f"❌ {model_name}: Failed or not found") + continue + + if len(detections) == 0: + print(f"🔵 {model_name}: No detections") + continue + + # Count shiny icons + classes = detections[:, 5].astype(int) if len(detections) > 0 else [] + shiny_count = np.sum(classes == 50) if len(classes) > 0 else 0 + total_count = len(detections) + + print(f"✅ {model_name}: {total_count} total, {shiny_count} shiny icons") + + print("="*80) + +if __name__ == "__main__": + # Look for test images + test_image_candidates = [ + "test_images/shiny_test.jpg", + "test_images/test.jpg", + "screenshots/shiny.jpg", + "screenshots/test.png" + ] + + test_image_found = None + for candidate in test_image_candidates: + if Path(candidate).exists(): + test_image_found = candidate + break + + if test_image_found: + print(f"🎯 Using test image: {test_image_found}") + test_multiple_models(test_image_found) + else: + print("❌ No test image found. Available options:") + for candidate in test_image_candidates: + print(f" {candidate}") + print("\nPlease provide a test image with shiny icon at one of these paths.") + print("You can use the debug_model_comparison.py script to capture a screenshot.") \ No newline at end of file