diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt index f37e6f7..d956c8d 100644 --- a/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/ScreenCaptureService.kt @@ -26,10 +26,12 @@ import com.quillstudios.pokegoalshelper.ui.FloatingOrbUI import org.opencv.android.Utils import org.opencv.core.* import org.opencv.imgproc.Imgproc +import org.opencv.imgcodecs.Imgcodecs import com.google.mlkit.vision.common.InputImage import com.google.mlkit.vision.text.TextRecognition import com.google.mlkit.vision.text.latin.TextRecognizerOptions import java.util.concurrent.CountDownLatch +import java.io.File import java.util.concurrent.TimeUnit import java.util.concurrent.Executors import java.util.concurrent.ThreadPoolExecutor @@ -344,6 +346,9 @@ class ScreenCaptureService : Service() { val rowStride = planes[0].rowStride val rowPadding = rowStride - pixelStride * screenWidth + Log.d(TAG, "๐Ÿ–ผ๏ธ CAPTURE DEBUG: pixelStride=$pixelStride, rowStride=$rowStride, rowPadding=$rowPadding") + Log.d(TAG, "๐Ÿ–ผ๏ธ CAPTURE DEBUG: screenSize=${screenWidth}x${screenHeight}, expected bitmap=${screenWidth + rowPadding / pixelStride}x${screenHeight}") + // Create bitmap from image val bitmap = Bitmap.createBitmap( screenWidth + rowPadding / pixelStride, @@ -352,16 +357,40 @@ class ScreenCaptureService : Service() { ) bitmap.copyPixelsFromBuffer(buffer) + Log.d(TAG, "๐Ÿ–ผ๏ธ CAPTURE DEBUG: created bitmap=${bitmap.width}x${bitmap.height}") + // Convert to cropped bitmap if needed val croppedBitmap = if (rowPadding == 0) { + Log.d(TAG, "๐Ÿ–ผ๏ธ CAPTURE DEBUG: No padding, using original bitmap") bitmap } else { + Log.d(TAG, "๐Ÿ–ผ๏ธ CAPTURE DEBUG: Cropping bitmap from ${bitmap.width}x${bitmap.height} to ${screenWidth}x${screenHeight}") Bitmap.createBitmap(bitmap, 0, 0, screenWidth, screenHeight) } + Log.d(TAG, "๐Ÿ–ผ๏ธ CAPTURE DEBUG: final bitmap=${croppedBitmap.width}x${croppedBitmap.height}") + // Convert to OpenCV Mat for analysis val mat = Mat() Utils.bitmapToMat(croppedBitmap, mat) + + // DEBUG: Check color conversion + Log.d(TAG, "๐ŸŽจ COLOR DEBUG: Mat type=${mat.type()}, channels=${mat.channels()}") + Log.d(TAG, "๐ŸŽจ COLOR DEBUG: OpenCV expects BGR, Android Bitmap is ARGB") + + // Sample a center pixel to check color values + if (mat.rows() > 0 && mat.cols() > 0) { + val centerY = mat.rows() / 2 + val centerX = mat.cols() / 2 + val pixel = mat.get(centerY, centerX) + if (pixel != null && pixel.size >= 3) { + val b = pixel[0].toInt() + val g = pixel[1].toInt() + val r = pixel[2].toInt() + Log.d(TAG, "๐ŸŽจ COLOR DEBUG: Center pixel (${centerX},${centerY}) BGR=($b,$g,$r) -> RGB=(${r},${g},${b})") + Log.d(TAG, "๐ŸŽจ COLOR DEBUG: Center pixel hex = #${String.format("%02x%02x%02x", r, g, b)}") + } + } // Run YOLO analysis analyzePokemonScreen(mat) @@ -1085,6 +1114,9 @@ class ScreenCaptureService : Service() { val rowStride = planes[0].rowStride val rowPadding = rowStride - pixelStride * screenWidth + Log.d(TAG, "๐Ÿ–ผ๏ธ MANUAL CAPTURE DEBUG: pixelStride=$pixelStride, rowStride=$rowStride, rowPadding=$rowPadding") + Log.d(TAG, "๐Ÿ–ผ๏ธ MANUAL CAPTURE DEBUG: screenSize=${screenWidth}x${screenHeight}") + // Create bitmap from image val bitmap = Bitmap.createBitmap( screenWidth + rowPadding / pixelStride, @@ -1093,19 +1125,71 @@ class ScreenCaptureService : Service() { ) bitmap.copyPixelsFromBuffer(buffer) + Log.d(TAG, "๐Ÿ–ผ๏ธ MANUAL CAPTURE DEBUG: created bitmap=${bitmap.width}x${bitmap.height}") + + // Crop bitmap to remove padding if needed + val croppedBitmap = if (rowPadding == 0) { + Log.d(TAG, "๐Ÿ–ผ๏ธ MANUAL CAPTURE DEBUG: No padding, using original bitmap") + bitmap + } else { + Log.d(TAG, "๐Ÿ–ผ๏ธ MANUAL CAPTURE DEBUG: Cropping bitmap from ${bitmap.width}x${bitmap.height} to ${screenWidth}x${screenHeight}") + val cropped = Bitmap.createBitmap(bitmap, 0, 0, screenWidth, screenHeight) + bitmap.recycle() // Clean up original + cropped + } + + Log.d(TAG, "๐Ÿ–ผ๏ธ MANUAL CAPTURE DEBUG: final bitmap=${croppedBitmap.width}x${croppedBitmap.height}") + // Convert bitmap to Mat val mat = Mat() - Utils.bitmapToMat(bitmap, mat) + Utils.bitmapToMat(croppedBitmap, mat) - // Convert from RGBA to RGB (YOLO expects RGB) - val rgbMat = Mat() - Imgproc.cvtColor(mat, rgbMat, Imgproc.COLOR_RGBA2RGB) + Log.d(TAG, "๐ŸŽจ MANUAL COLOR DEBUG: Mat type=${mat.type()}, channels=${mat.channels()}, size=${mat.cols()}x${mat.rows()}") + + // Sample specific pixels to check color values + if (mat.rows() > 0 && mat.cols() > 0) { + // Sample center pixel + val centerY = mat.rows() / 2 + val centerX = mat.cols() / 2 + val centerPixel = mat.get(centerY, centerX) + if (centerPixel != null && centerPixel.size >= 3) { + val b = centerPixel[0].toInt() + val g = centerPixel[1].toInt() + val r = centerPixel[2].toInt() + val a = if (centerPixel.size >= 4) centerPixel[3].toInt() else 255 + Log.d(TAG, "๐ŸŽจ MANUAL COLOR DEBUG: Center pixel (${centerX},${centerY}) BGRA=($b,$g,$r,$a) -> RGBA=($r,$g,$b,$a)") + Log.d(TAG, "๐ŸŽจ MANUAL COLOR DEBUG: Center pixel hex = #${String.format("%02x%02x%02x", r, g, b)}") + } + + // Sample shiny icon pixel at x=155, y=1087 + val shinyX = 155 + val shinyY = 1087 + if (shinyX < mat.cols() && shinyY < mat.rows()) { + val shinyPixel = mat.get(shinyY, shinyX) + if (shinyPixel != null && shinyPixel.size >= 3) { + val b = shinyPixel[0].toInt() + val g = shinyPixel[1].toInt() + val r = shinyPixel[2].toInt() + val a = if (shinyPixel.size >= 4) shinyPixel[3].toInt() else 255 + Log.d(TAG, "โœจ SHINY PIXEL DEBUG: Shiny icon pixel (${shinyX},${shinyY}) BGRA=($b,$g,$r,$a) -> RGBA=($r,$g,$b,$a)") + Log.d(TAG, "โœจ SHINY PIXEL DEBUG: Shiny icon pixel hex = #${String.format("%02x%02x%02x", r, g, b)}") + } + } else { + Log.w(TAG, "โš ๏ธ SHINY PIXEL DEBUG: Coordinates (${shinyX},${shinyY}) out of bounds for ${mat.cols()}x${mat.rows()} image") + } + } + + // Convert from RGBA to BGR (OpenCV format, then YOLO preprocessing will handle RGB conversion) + val bgrMat = Mat() + Imgproc.cvtColor(mat, bgrMat, Imgproc.COLOR_RGBA2BGR) + + Log.d(TAG, "๐ŸŽจ COLOR FIX: Converted RGBA to BGR format for OpenCV compatibility") // Clean up mat.release() - bitmap.recycle() + croppedBitmap.recycle() - rgbMat + bgrMat } catch (e: Exception) { Log.e(TAG, "โŒ Error converting image to Mat", e) null @@ -1121,6 +1205,11 @@ class ScreenCaptureService : Service() { val mat = convertImageToMat(image) if (mat != null) { + // DEBUG: Save captured image for comparison with working test image + saveDebugImage(mat, "captured_screen_${System.currentTimeMillis()}") + + // Also test this captured image through ONNX pipeline directly + testCapturedImageThroughONNX(mat) // Use controller to process detection (this will notify UI via callbacks) val detections = detectionController.processDetection(mat) @@ -1149,6 +1238,71 @@ class ScreenCaptureService : Service() { } } + /** + * Save debug image to external storage for comparison + */ + private fun saveDebugImage(mat: Mat, filename: String) { + try { + val debugDir = File(getExternalFilesDir(null), "debug_images") + if (!debugDir.exists()) { + debugDir.mkdirs() + } + + val imageFile = File(debugDir, "$filename.jpg") + val success = Imgcodecs.imwrite(imageFile.absolutePath, mat) + + if (success) { + Log.d(TAG, "๐Ÿ–ผ๏ธ DEBUG: Saved captured image to ${imageFile.absolutePath}") + Log.d(TAG, "๐Ÿ–ผ๏ธ DEBUG: Image properties - Size: ${mat.cols()}x${mat.rows()}, Type: ${mat.type()}, Channels: ${mat.channels()}") + } else { + Log.e(TAG, "โŒ DEBUG: Failed to save image") + } + } catch (e: Exception) { + Log.e(TAG, "โŒ DEBUG: Error saving image", e) + } + } + + /** + * Test captured image directly through ONNX pipeline to isolate issues + */ + private fun testCapturedImageThroughONNX(mat: Mat) { + try { + Log.d(TAG, "๐Ÿงช DEBUG: Testing captured image through ONNX pipeline") + Log.d(TAG, "๐Ÿงช DEBUG: Input image - Size: ${mat.cols()}x${mat.rows()}, Type: ${mat.type()}") + + // Test with existing initialized ONNX detector + val currentDetector = yoloDetector + val detections = if (currentDetector is YOLOOnnxDetector) { + Log.d(TAG, "๐Ÿงช DEBUG: Using existing ONNX detector") + currentDetector.detect(mat) + } else { + Log.d(TAG, "๐Ÿงช DEBUG: Creating new ONNX detector for test") + val testDetector = YOLOOnnxDetector(this) + testDetector.detect(mat) + } + + Log.d(TAG, "๐Ÿงช DEBUG: Direct ONNX test found ${detections.size} detections") + + // Check specifically for shiny icons (class 50) + val shinyDetections = detections.filter { detection -> detection.classId == 50 } + if (shinyDetections.isNotEmpty()) { + Log.d(TAG, "โœจ DEBUG: FOUND ${shinyDetections.size} SHINY ICONS in captured image!") + shinyDetections.forEach { detection -> + Log.d(TAG, "โœจ DEBUG: Shiny detection - conf: ${detection.confidence}, box: [${detection.boundingBox.x}, ${detection.boundingBox.y}, ${detection.boundingBox.width}, ${detection.boundingBox.height}]") + } + } else { + Log.d(TAG, "โŒ DEBUG: NO SHINY ICONS found in captured image") + } + + // Log all detections for comparison + val classGroups = detections.groupBy { detection -> detection.classId } + Log.d(TAG, "๐Ÿงช DEBUG: Detection classes found: ${classGroups.keys.sorted()}") + + } catch (e: Exception) { + Log.e(TAG, "โŒ DEBUG: Error testing captured image", e) + } + } + override fun onDestroy() { super.onDestroy() hideDetectionOverlay() diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt index ce7022c..817e0eb 100644 --- a/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/YOLOOnnxDetector.kt @@ -414,23 +414,38 @@ class YOLOOnnxDetector(private val context: Context) { // Log actual tensor dimensions to understand the model output format Log.w(TAG, "๐Ÿ”ฌ [TENSOR DIMS] OutputTensor shape: ${outputTensor.size} x ${outputTensor[0].size} x ${outputTensor[0][0].size}") - // For NMS-enabled model (1 x 300 x 6), check what classes are being detected - if (outputTensor[0].size == 300 && outputTensor[0][0].size == 6) { + // Handle different model output formats + val dim1 = outputTensor[0].size + val dim2 = outputTensor[0][0].size + + if ((dim1 == 300 || dim1 == 500 || dim1 == 1000) && dim2 == 6) { + // NMS-enabled model (1 x N x 6) format where N = max_det var shinyFound = false val detectedClasses = mutableSetOf() - var lowConfShinyCount = 0 - for (i in 0 until 300) { + var class50NonZeroCount = 0 + var totalClass50Count = 0 + var class29Count = 0 + for (i in 0 until dim1) { val detection = outputTensor[0][i] val confidence = detection[4] val classId = detection[5].toInt() - // Check for ANY shiny detections, even very low confidence + // Count ALL class 50 detections, regardless of confidence if (classId == 50) { - lowConfShinyCount++ - if (confidence > 0.01f) { // Much lower threshold for shiny + totalClass50Count++ + if (confidence > 0.000001f) { // Any non-zero confidence + class50NonZeroCount++ + Log.w(TAG, "๐Ÿ” [CLASS 50] Index: $i, Confidence: %.6f, Coords: [%.1f,%.1f,%.1f,%.1f]".format(confidence, detection[0], detection[1], detection[2], detection[3])) + } + } + + // Also check class 29 for comparison + if (classId == 29) { + class29Count++ + if (confidence > 0.01f) { // Much lower threshold for known working class shinyFound = true - Log.w(TAG, "โœจ [FOUND SHINY] Index: $i, Confidence: %.4f, ClassID: $classId, Coords: [%.1f,%.1f,%.1f,%.1f]".format(confidence, detection[0], detection[1], detection[2], detection[3])) + Log.w(TAG, "โœจ [CLASS 29] Index: $i, Confidence: %.4f, Coords: [%.1f,%.1f,%.1f,%.1f]".format(confidence, detection[0], detection[1], detection[2], detection[3])) } } @@ -439,16 +454,70 @@ class YOLOOnnxDetector(private val context: Context) { } } - if (lowConfShinyCount > 0) { - Log.w(TAG, "๐Ÿ” [LOW CONF SHINY] Found $lowConfShinyCount class-50 predictions (any confidence)") - } + Log.w(TAG, "๐Ÿ” [SUMMARY] Class 29: $class29Count total, Class 50: $totalClass50Count total ($class50NonZeroCount non-zero)") Log.w(TAG, "๐Ÿ”ฌ [NMS CLASSES] Detected classes: ${detectedClasses.sorted()}") if (!shinyFound) { Log.w(TAG, "โŒ [NO SHINY] Shiny icon (class 50) not found in NMS output") } + } else if (dim1 == 100 && dim2 == 8400) { + // Raw no-NMS model (1 x 100 x 8400) format: [x,y,w,h,objectness,class0,class1,...,class94] ร— 8400 detections + Log.w(TAG, "๐Ÿ”ฌ [RAW MODEL] Detected raw no-NMS format: checking for shiny icons in 8400 detections") + + var shinyFound = false + var maxRawShiny = 0f + var maxSigmoidShiny = 0f + var shinyDetectionCount = 0 + val highConfShinyBoxes = mutableListOf() + val topShinyDetections = mutableListOf>() + + // Class 50 data starts at index 55 (after x,y,w,h,objectness + 50 classes) + val shinyClassRow = 4 + 1 + 50 // row 55 + + for (detectionIdx in 0 until 8400) { + val rawShinyValue = outputTensor[0][shinyClassRow][detectionIdx] + + // Try sigmoid activation to convert logits to probabilities + val shinyConfidence = 1.0f / (1.0f + kotlin.math.exp(-rawShinyValue)) + + if (rawShinyValue > maxRawShiny) { + maxRawShiny = rawShinyValue + } + if (shinyConfidence > maxSigmoidShiny) { + maxSigmoidShiny = shinyConfidence + } + + // Track top detections for debugging + topShinyDetections.add(Pair(shinyConfidence, detectionIdx)) + + if (shinyConfidence > 0.1f) { + shinyDetectionCount++ + shinyFound = true + + // Get bounding box coordinates + val x = outputTensor[0][0][detectionIdx] + val y = outputTensor[0][1][detectionIdx] + val w = outputTensor[0][2][detectionIdx] + val h = outputTensor[0][3][detectionIdx] + + highConfShinyBoxes.add("Detection $detectionIdx: raw=%.4f, sigmoid=%.4f, box=[%.1f,%.1f,%.1f,%.1f]".format(rawShinyValue, shinyConfidence, x, y, w, h)) + } + } + + // Show top shiny detections for debugging + val topShiny = topShinyDetections.sortedByDescending { it.first }.take(3) + + Log.w(TAG, "๐Ÿ”ฌ [RAW SHINY] Max raw: %.4f, Max sigmoid: %.4f, Count >0.1: $shinyDetectionCount".format(maxRawShiny, maxSigmoidShiny)) + Log.w(TAG, "๐Ÿ”ฌ [TOP SHINY] Top 3 sigmoid values: ${topShiny.map { "%.4f@${it.second}".format(it.first) }}") + + if (shinyFound) { + Log.w(TAG, "โœจ [RAW SHINY FOUND] High-confidence shiny detections:") + highConfShinyBoxes.take(5).forEach { Log.w(TAG, " $it") } + } else { + Log.w(TAG, "โŒ [RAW NO SHINY] No high-confidence shiny detections in raw 8400 predictions") + } } else { - Log.w(TAG, "๐Ÿ”ฌ [SIZE CHECK] Expected NMS format (1x300x6), got: ${outputTensor.size}x${outputTensor[0].size}x${outputTensor[0][0].size}") + Log.w(TAG, "๐Ÿ”ฌ [UNKNOWN FORMAT] Unexpected tensor shape: ${outputTensor.size}x${dim1}x${dim2}") } } diff --git a/debug_model_comparison.py b/debug_model_comparison.py new file mode 100644 index 0000000..6bbcffe --- /dev/null +++ b/debug_model_comparison.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python3 +""" +Compare .pt model predictions with ONNX model outputs on the same static test image +Provides detailed debug output to identify differences in preprocessing and inference +""" + +import cv2 +import numpy as np +from ultralytics import YOLO +import onnxruntime as ort +import torch +import os +from pathlib import Path + +# Force CPU-only execution to avoid CUDA compatibility issues +os.environ['CUDA_VISIBLE_DEVICES'] = '' +torch.cuda.is_available = lambda: False + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + """Letterbox preprocessing - exact copy of YOLO preprocessing""" + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def preprocess_image(image_path, target_size=(640, 640)): + """Preprocess image for ONNX model - matches Android preprocessing""" + print(f"๐Ÿ“ธ Loading image: {image_path}") + + # Load image + img = cv2.imread(str(image_path)) + if img is None: + raise ValueError(f"Could not load image: {image_path}") + + print(f" Original size: {img.shape}") + + # Apply letterbox (same as YOLO preprocessing) + img_processed, ratio, pad = letterbox(img, target_size) + print(f" Letterboxed size: {img_processed.shape}") + print(f" Scale ratio: {ratio}") + print(f" Padding (dw, dh): {pad}") + + # Convert BGR to RGB + img_rgb = cv2.cvtColor(img_processed, cv2.COLOR_BGR2RGB) + + # Normalize to [0, 1] and convert to CHW format + img_normalized = img_rgb.astype(np.float32) / 255.0 + img_chw = np.transpose(img_normalized, (2, 0, 1)) + img_batch = np.expand_dims(img_chw, axis=0) + + print(f" Final tensor shape: {img_batch.shape}") + print(f" Value range: [{img_batch.min():.3f}, {img_batch.max():.3f}]") + + return img_batch, img, ratio, pad + +def run_pt_model(model_path, image_path): + """Run .pt model prediction with full debug output""" + print("\n๐Ÿ”ฅ Running .pt model prediction:") + print(f" Model: {model_path}") + + # Load model + model = YOLO(model_path) + + # Run prediction with verbose output + results = model.predict( + source=str(image_path), + conf=0.01, # Very low confidence to catch everything + iou=0.5, + max_det=1000, + verbose=True, + save=False + ) + + result = results[0] + print(f" Found {len(result.boxes)} detections") + + # Extract raw data + if len(result.boxes) > 0: + boxes = result.boxes.xyxy.cpu().numpy() # x1, y1, x2, y2 + confidences = result.boxes.conf.cpu().numpy() + classes = result.boxes.cls.cpu().numpy().astype(int) + + print(f"\n๐Ÿ“Š .pt Model Results Summary:") + print(f" Total detections: {len(boxes)}") + + # Group by class + class_counts = {} + for cls_id in classes: + class_counts[cls_id] = class_counts.get(cls_id, 0) + 1 + + print(f" Classes found: {sorted(class_counts.keys())}") + + # Focus on shiny icon (class 50) + shiny_detections = [(i, conf) for i, (cls_id, conf) in enumerate(zip(classes, confidences)) if cls_id == 50] + if shiny_detections: + print(f"\nโœจ SHINY ICON DETECTIONS (Class 50):") + for i, conf in shiny_detections: + box = boxes[i] + print(f" Detection {i}: conf={conf:.6f}, box=[{box[0]:.1f},{box[1]:.1f},{box[2]:.1f},{box[3]:.1f}]") + else: + print(f"\nโŒ NO SHINY ICON DETECTIONS (Class 50)") + + # Show all detections with confidence > 0.1 + high_conf_detections = [(i, cls_id, conf) for i, (cls_id, conf) in enumerate(zip(classes, confidences)) if conf > 0.1] + if high_conf_detections: + print(f"\n๐ŸŽฏ High confidence detections (>0.1):") + for i, cls_id, conf in high_conf_detections[:10]: # Show top 10 + box = boxes[i] + print(f" Class {cls_id}: conf={conf:.6f}, box=[{box[0]:.1f},{box[1]:.1f},{box[2]:.1f},{box[3]:.1f}]") + + return boxes, confidences, classes + else: + print(f"\nโŒ NO DETECTIONS FOUND") + return None, None, None + +def run_onnx_model(model_path, preprocessed_img): + """Run ONNX model inference with full debug output""" + print(f"\n๐Ÿ”ง Running ONNX model inference:") + print(f" Model: {model_path}") + + # Load ONNX model + session = ort.InferenceSession(str(model_path)) + + # Get model info + input_name = session.get_inputs()[0].name + output_names = [output.name for output in session.get_outputs()] + + print(f" Input name: {input_name}") + print(f" Output names: {output_names}") + print(f" Input shape: {preprocessed_img.shape}") + + # Run inference + outputs = session.run(output_names, {input_name: preprocessed_img}) + + print(f" Number of outputs: {len(outputs)}") + for i, output in enumerate(outputs): + print(f" Output {i} shape: {output.shape}") + + # Process main output (should be detections) + detections = outputs[0] # Usually the first output contains detections + + if len(detections.shape) == 3: + batch_size, num_detections, num_values = detections.shape + print(f" Detections tensor: [{batch_size}, {num_detections}, {num_values}]") + + # Extract detections from batch + detection_data = detections[0] # Remove batch dimension + + if num_values == 6: # NMS format: [x, y, w, h, conf, class] + print(f" Format: NMS output (x, y, w, h, conf, class)") + + # Count valid detections (non-zero confidence) + valid_mask = detection_data[:, 4] > 0.000001 # conf > 0 + valid_detections = detection_data[valid_mask] + + print(f" Valid detections: {len(valid_detections)} / {num_detections}") + + if len(valid_detections) > 0: + confidences = valid_detections[:, 4] + classes = valid_detections[:, 5].astype(int) + + # Group by class + class_counts = {} + for cls_id in classes: + class_counts[cls_id] = class_counts.get(cls_id, 0) + 1 + + print(f" Classes found: {sorted(class_counts.keys())}") + + # Focus on shiny icon (class 50) + shiny_mask = classes == 50 + shiny_detections = valid_detections[shiny_mask] + + if len(shiny_detections) > 0: + print(f"\nโœจ SHINY ICON DETECTIONS (Class 50): {len(shiny_detections)}") + for i, det in enumerate(shiny_detections): + print(f" Detection {i}: conf={det[4]:.6f}, box=[{det[0]:.1f},{det[1]:.1f},{det[2]:.1f},{det[3]:.1f}]") + else: + print(f"\nโŒ NO SHINY ICON DETECTIONS (Class 50)") + + # Show high confidence detections + high_conf_mask = confidences > 0.1 + high_conf_detections = valid_detections[high_conf_mask] + + if len(high_conf_detections) > 0: + print(f"\n๐ŸŽฏ High confidence detections (>0.1): {len(high_conf_detections)}") + for i, det in enumerate(high_conf_detections[:10]): # Show top 10 + print(f" Class {int(det[5])}: conf={det[4]:.6f}, box=[{det[0]:.1f},{det[1]:.1f},{det[2]:.1f},{det[3]:.1f}]") + + return valid_detections + + elif num_values > 80: # Raw format: [x, y, w, h, obj, class0, class1, ...] + print(f" Format: Raw output ({num_values-5} classes)") + + # This would need more complex processing for raw outputs + print(f" โš ๏ธ Raw format detected - would need objectness * class confidence processing") + return None + + else: + print(f" โš ๏ธ Unexpected output shape: {detections.shape}") + return None + +def compare_models(pt_model_path, onnx_model_path, test_image_path): + """Compare .pt and ONNX model outputs on the same image""" + print("="*80) + print("๐Ÿ” MODEL COMPARISON DEBUG SESSION") + print("="*80) + + # Check if files exist + for path, name in [(pt_model_path, ".pt model"), (onnx_model_path, "ONNX model"), (test_image_path, "test image")]: + if not Path(path).exists(): + print(f"โŒ {name} not found: {path}") + return + + # Preprocess image for ONNX + try: + preprocessed_img, original_img, ratio, pad = preprocess_image(test_image_path) + except Exception as e: + print(f"โŒ Failed to preprocess image: {e}") + return + + # Run .pt model + try: + pt_boxes, pt_confidences, pt_classes = run_pt_model(pt_model_path, test_image_path) + except Exception as e: + print(f"โŒ Failed to run .pt model: {e}") + pt_boxes, pt_confidences, pt_classes = None, None, None + + # Run ONNX model + try: + onnx_detections = run_onnx_model(onnx_model_path, preprocessed_img) + except Exception as e: + print(f"โŒ Failed to run ONNX model: {e}") + onnx_detections = None + + # Compare results + print("\n" + "="*80) + print("๐Ÿ“Š COMPARISON SUMMARY") + print("="*80) + + # Count shiny detections + pt_shiny_count = 0 + onnx_shiny_count = 0 + + if pt_classes is not None: + pt_shiny_count = np.sum(pt_classes == 50) + + if onnx_detections is not None and len(onnx_detections) > 0: + if onnx_detections.shape[1] == 6: # NMS format + onnx_classes = onnx_detections[:, 5].astype(int) + onnx_shiny_count = np.sum(onnx_classes == 50) + + print(f"๐Ÿ”ฅ .pt Model Results:") + print(f" Total detections: {len(pt_boxes) if pt_boxes is not None else 0}") + print(f" Shiny icons (class 50): {pt_shiny_count}") + + print(f"\n๐Ÿ”ง ONNX Model Results:") + print(f" Total detections: {len(onnx_detections) if onnx_detections is not None else 0}") + print(f" Shiny icons (class 50): {onnx_shiny_count}") + + if pt_shiny_count > 0 and onnx_shiny_count == 0: + print(f"\n๐Ÿšจ ISSUE CONFIRMED: .pt model finds {pt_shiny_count} shiny icons, ONNX finds 0") + print(f" This confirms the preprocessing/inference discrepancy") + elif pt_shiny_count == onnx_shiny_count and pt_shiny_count > 0: + print(f"\nโœ… Both models find {pt_shiny_count} shiny icons - issue may be elsewhere") + + print("\n" + "="*80) + +if __name__ == "__main__": + # Test with available models and image + pt_model = "raw_models/best.pt" + + # Test multiple ONNX variants + onnx_models = [ + "app/src/main/assets/best.onnx", + "raw_models/exports/best_no_nms.onnx", + "raw_models/exports/best_nms_relaxed.onnx", + "raw_models/exports/best_nms_very_relaxed.onnx" + ] + + # You'll need to provide a test image with known shiny icon + test_image = "test_images/shiny_test.jpg" # Replace with actual test image path + + print("๐Ÿ” Looking for test images...") + + # Try to find a suitable test image + test_image_candidates = [ + "test_images/shiny_test.jpg", + "test_images/test.jpg", + "screenshots/shiny.jpg", + "screenshots/test.png" + ] + + test_image_found = None + for candidate in test_image_candidates: + if Path(candidate).exists(): + test_image_found = candidate + print(f" Found test image: {candidate}") + break + + if not test_image_found: + print("โŒ No test image found. Please provide a test image with shiny icon at one of these paths:") + for candidate in test_image_candidates: + print(f" {candidate}") + print("\nYou can capture a screenshot with shiny icon and save it as test_images/shiny_test.jpg") + exit(1) + + # Run comparison for each ONNX model + for onnx_model in onnx_models: + if Path(onnx_model).exists(): + print(f"\n๐Ÿ”„ Testing ONNX model: {onnx_model}") + compare_models(pt_model, onnx_model, test_image_found) + print("\n" + "="*120 + "\n") + else: + print(f"โš ๏ธ ONNX model not found: {onnx_model}") \ No newline at end of file diff --git a/test_static_onnx.py b/test_static_onnx.py new file mode 100644 index 0000000..76d67d9 --- /dev/null +++ b/test_static_onnx.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +""" +Test ONNX model against static images to isolate OpenCV capture issues +This bypasses Android screen capture and tests pure ONNX inference +""" + +import cv2 +import numpy as np +import onnxruntime as ort +import os +from pathlib import Path + +# Force CPU-only execution to avoid CUDA compatibility issues +os.environ['CUDA_VISIBLE_DEVICES'] = '' + +def letterbox_preprocess(img, target_size=(640, 640)): + """Exact letterbox preprocessing matching Android implementation""" + h, w = img.shape[:2] + + # Calculate scale factor + scale = min(target_size[0] / h, target_size[1] / w) + + # Calculate new dimensions + new_w = int(w * scale) + new_h = int(h * scale) + + # Resize image + resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + + # Create padded image + padded = np.full((target_size[0], target_size[1], 3), 114, dtype=np.uint8) + + # Calculate padding offsets + pad_x = (target_size[1] - new_w) // 2 + pad_y = (target_size[0] - new_h) // 2 + + # Place resized image in center + padded[pad_y:pad_y + new_h, pad_x:pad_x + new_w] = resized + + return padded, scale, (pad_x, pad_y) + +def test_onnx_static(model_path, image_path, confidence_threshold=0.01): + """Test ONNX model on static image with detailed output""" + print(f"๐Ÿ”ง Testing ONNX model: {Path(model_path).name}") + print(f"๐Ÿ“ธ Image: {Path(image_path).name}") + + # Load image + img = cv2.imread(str(image_path)) + if img is None: + print(f"โŒ Could not load image: {image_path}") + return None + + print(f" Original image size: {img.shape}") + + # Preprocess + processed_img, scale, padding = letterbox_preprocess(img) + print(f" Processed size: {processed_img.shape}") + print(f" Scale factor: {scale:.4f}") + print(f" Padding (x, y): {padding}") + + # Convert for ONNX (RGB, normalize, CHW, batch) + img_rgb = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB) + img_norm = img_rgb.astype(np.float32) / 255.0 + img_chw = np.transpose(img_norm, (2, 0, 1)) + img_batch = np.expand_dims(img_chw, axis=0) + + print(f" Final tensor: {img_batch.shape}, range: [{img_batch.min():.3f}, {img_batch.max():.3f}]") + + # Load ONNX model + try: + session = ort.InferenceSession(str(model_path)) + input_name = session.get_inputs()[0].name + print(f" Model loaded, input: {input_name}") + except Exception as e: + print(f"โŒ Failed to load ONNX model: {e}") + return None + + # Run inference + try: + outputs = session.run(None, {input_name: img_batch}) + print(f" Inference successful, {len(outputs)} outputs") + except Exception as e: + print(f"โŒ Inference failed: {e}") + return None + + # Process outputs + if len(outputs) == 0: + print(f"โŒ No outputs from model") + return None + + detections = outputs[0] + print(f" Detection tensor shape: {detections.shape}") + + if len(detections.shape) != 3: + print(f"โŒ Unexpected detection shape: {detections.shape}") + return None + + batch_size, num_detections, num_values = detections.shape + detection_data = detections[0] # Remove batch dimension + + if num_values == 6: # NMS format + print(f" Format: NMS (x, y, w, h, conf, class)") + + # Filter by confidence + valid_mask = detection_data[:, 4] > confidence_threshold + valid_detections = detection_data[valid_mask] + + print(f" Valid detections (conf > {confidence_threshold}): {len(valid_detections)}") + + if len(valid_detections) == 0: + print(f" โŒ No detections above confidence threshold") + return [] + + # Analyze by class + classes = valid_detections[:, 5].astype(int) + confidences = valid_detections[:, 4] + + class_counts = {} + for cls_id in classes: + class_counts[cls_id] = class_counts.get(cls_id, 0) + 1 + + print(f" Classes detected: {sorted(class_counts.keys())}") + + # Focus on shiny icons (class 50) + shiny_mask = classes == 50 + shiny_detections = valid_detections[shiny_mask] + + if len(shiny_detections) > 0: + print(f" โœจ SHINY ICONS FOUND: {len(shiny_detections)}") + for i, det in enumerate(shiny_detections): + x, y, w, h, conf, cls = det + print(f" Shiny {i+1}: conf={conf:.6f}, box=[{x:.1f}, {y:.1f}, {w:.1f}, {h:.1f}]") + else: + print(f" โŒ NO SHINY ICONS (class 50) detected") + + # Show top detections + if len(valid_detections) > 0: + # Sort by confidence + sorted_indices = np.argsort(confidences)[::-1] + top_detections = valid_detections[sorted_indices[:10]] + + print(f" ๐ŸŽฏ Top 10 detections:") + for i, det in enumerate(top_detections): + x, y, w, h, conf, cls = det + print(f" {i+1}. Class {int(cls)}: conf={conf:.4f}, box=[{x:.1f}, {y:.1f}, {w:.1f}, {h:.1f}]") + + return valid_detections + + else: + print(f" โš ๏ธ Raw format detected ({num_values} values) - not processed") + return None + +def test_multiple_models(image_path): + """Test multiple ONNX models on the same image""" + print("="*80) + print("๐Ÿ” STATIC IMAGE ONNX TESTING") + print("="*80) + + models_to_test = [ + "app/src/main/assets/best.onnx", + "raw_models/exports/best_no_nms.onnx", + "raw_models/exports/best_nms_relaxed.onnx", + "raw_models/exports/best_nms_very_relaxed.onnx" + ] + + results = {} + + for model_path in models_to_test: + if Path(model_path).exists(): + print(f"\n{'='*60}") + detections = test_onnx_static(model_path, image_path) + results[model_path] = detections + else: + print(f"\nโš ๏ธ Model not found: {model_path}") + results[model_path] = None + + # Summary comparison + print(f"\n{'='*80}") + print("๐Ÿ“Š COMPARISON SUMMARY") + print("="*80) + + for model_path, detections in results.items(): + model_name = Path(model_path).name + + if detections is None: + print(f"โŒ {model_name}: Failed or not found") + continue + + if len(detections) == 0: + print(f"๐Ÿ”ต {model_name}: No detections") + continue + + # Count shiny icons + classes = detections[:, 5].astype(int) if len(detections) > 0 else [] + shiny_count = np.sum(classes == 50) if len(classes) > 0 else 0 + total_count = len(detections) + + print(f"โœ… {model_name}: {total_count} total, {shiny_count} shiny icons") + + print("="*80) + +if __name__ == "__main__": + # Look for test images + test_image_candidates = [ + "test_images/shiny_test.jpg", + "test_images/test.jpg", + "screenshots/shiny.jpg", + "screenshots/test.png" + ] + + test_image_found = None + for candidate in test_image_candidates: + if Path(candidate).exists(): + test_image_found = candidate + break + + if test_image_found: + print(f"๐ŸŽฏ Using test image: {test_image_found}") + test_multiple_models(test_image_found) + else: + print("โŒ No test image found. Available options:") + for candidate in test_image_candidates: + print(f" {candidate}") + print("\nPlease provide a test image with shiny icon at one of these paths.") + print("You can use the debug_model_comparison.py script to capture a screenshot.") \ No newline at end of file