PokeGoalsHelper/tools/debug_scripts/debug_model_comparison.py


								#!/usr/bin/env python3

								"""

								Compare .pt model predictions with ONNX model outputs on the same static test image

								Provides detailed debug output to identify differences in preprocessing and inference

								"""


								import cv2

								import numpy as np

								from ultralytics import YOLO

								import onnxruntime as ort

								import torch

								import os

								from pathlib import Path


								# Force CPU-only execution to avoid CUDA compatibility issues

								os.environ['CUDA_VISIBLE_DEVICES'] = ''

								torch.cuda.is_available = lambda: False


								def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):

								    """Letterbox preprocessing - exact copy of YOLO preprocessing"""

								    shape = im.shape[:2]  # current shape [height, width]

								    if isinstance(new_shape, int):

								        new_shape = (new_shape, new_shape)


								    # Scale ratio (new / old)

								    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

								    if not scaleup:  # only scale down, do not scale up (for better val mAP)

								        r = min(r, 1.0)


								    # Compute padding

								    ratio = r, r  # width, height ratios

								    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

								    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

								    if auto:  # minimum rectangle

								        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding

								    elif scaleFill:  # stretch

								        dw, dh = 0.0, 0.0

								        new_unpad = (new_shape[1], new_shape[0])

								        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios


								    dw /= 2  # divide padding into 2 sides

								    dh /= 2


								    if shape[::-1] != new_unpad:  # resize

								        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)

								    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))

								    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))

								    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border

								    return im, ratio, (dw, dh)


								def preprocess_image(image_path, target_size=(640, 640)):

								    """Preprocess image for ONNX model - matches Android preprocessing"""

								    print(f"📸 Loading image: {image_path}")


								    # Load image

								    img = cv2.imread(str(image_path))

								    if img is None:

								        raise ValueError(f"Could not load image: {image_path}")


								    print(f"   Original size: {img.shape}")


								    # Apply letterbox (same as YOLO preprocessing)

								    img_processed, ratio, pad = letterbox(img, target_size)

								    print(f"   Letterboxed size: {img_processed.shape}")

								    print(f"   Scale ratio: {ratio}")

								    print(f"   Padding (dw, dh): {pad}")


								    # Convert BGR to RGB

								    img_rgb = cv2.cvtColor(img_processed, cv2.COLOR_BGR2RGB)


								    # Normalize to [0, 1] and convert to CHW format

								    img_normalized = img_rgb.astype(np.float32) / 255.0

								    img_chw = np.transpose(img_normalized, (2, 0, 1))

								    img_batch = np.expand_dims(img_chw, axis=0)


								    print(f"   Final tensor shape: {img_batch.shape}")

								    print(f"   Value range: [{img_batch.min():.3f}, {img_batch.max():.3f}]")


								    return img_batch, img, ratio, pad


								def run_pt_model(model_path, image_path):

								    """Run .pt model prediction with full debug output"""

								    print("\n🔥 Running .pt model prediction:")

								    print(f"   Model: {model_path}")


								    # Load model

								    model = YOLO(model_path)


								    # Run prediction with verbose output

								    results = model.predict(

								        source=str(image_path),

								        conf=0.01,  # Very low confidence to catch everything

								        iou=0.5,

								        max_det=1000,

								        verbose=True,

								        save=False

								    )


								    result = results[0]

								    print(f"   Found {len(result.boxes)} detections")


								    # Extract raw data

								    if len(result.boxes) > 0:

								        boxes = result.boxes.xyxy.cpu().numpy()  # x1, y1, x2, y2

								        confidences = result.boxes.conf.cpu().numpy()

								        classes = result.boxes.cls.cpu().numpy().astype(int)


								        print(f"\n📊 .pt Model Results Summary:")

								        print(f"   Total detections: {len(boxes)}")


								        # Group by class

								        class_counts = {}

								        for cls_id in classes:

								            class_counts[cls_id] = class_counts.get(cls_id, 0) + 1


								        print(f"   Classes found: {sorted(class_counts.keys())}")


								        # Focus on shiny icon (class 50)

								        shiny_detections = [(i, conf) for i, (cls_id, conf) in enumerate(zip(classes, confidences)) if cls_id == 50]

								        if shiny_detections:

								            print(f"\n✨ SHINY ICON DETECTIONS (Class 50):")

								            for i, conf in shiny_detections:

								                box = boxes[i]

								                print(f"      Detection {i}: conf={conf:.6f}, box=[{box[0]:.1f},{box[1]:.1f},{box[2]:.1f},{box[3]:.1f}]")

								        else:

								            print(f"\n❌ NO SHINY ICON DETECTIONS (Class 50)")


								        # Show all detections with confidence > 0.1

								        high_conf_detections = [(i, cls_id, conf) for i, (cls_id, conf) in enumerate(zip(classes, confidences)) if conf > 0.1]

								        if high_conf_detections:

								            print(f"\n🎯 High confidence detections (>0.1):")

								            for i, cls_id, conf in high_conf_detections[:10]:  # Show top 10

								                box = boxes[i]

								                print(f"      Class {cls_id}: conf={conf:.6f}, box=[{box[0]:.1f},{box[1]:.1f},{box[2]:.1f},{box[3]:.1f}]")


								        return boxes, confidences, classes

								    else:

								        print(f"\n❌ NO DETECTIONS FOUND")

								        return None, None, None


								def run_onnx_model(model_path, preprocessed_img):

								    """Run ONNX model inference with full debug output"""

								    print(f"\n🔧 Running ONNX model inference:")

								    print(f"   Model: {model_path}")


								    # Load ONNX model

								    session = ort.InferenceSession(str(model_path))


								    # Get model info

								    input_name = session.get_inputs()[0].name

								    output_names = [output.name for output in session.get_outputs()]


								    print(f"   Input name: {input_name}")

								    print(f"   Output names: {output_names}")

								    print(f"   Input shape: {preprocessed_img.shape}")


								    # Run inference

								    outputs = session.run(output_names, {input_name: preprocessed_img})


								    print(f"   Number of outputs: {len(outputs)}")

								    for i, output in enumerate(outputs):

								        print(f"   Output {i} shape: {output.shape}")


								    # Process main output (should be detections)

								    detections = outputs[0]  # Usually the first output contains detections


								    if len(detections.shape) == 3:

								        batch_size, num_detections, num_values = detections.shape

								        print(f"   Detections tensor: [{batch_size}, {num_detections}, {num_values}]")


								        # Extract detections from batch

								        detection_data = detections[0]  # Remove batch dimension


								        if num_values == 6:  # NMS format: [x, y, w, h, conf, class]

								            print(f"   Format: NMS output (x, y, w, h, conf, class)")


								            # Count valid detections (non-zero confidence)

								            valid_mask = detection_data[:, 4] > 0.000001  # conf > 0

								            valid_detections = detection_data[valid_mask]


								            print(f"   Valid detections: {len(valid_detections)} / {num_detections}")


								            if len(valid_detections) > 0:

								                confidences = valid_detections[:, 4]

								                classes = valid_detections[:, 5].astype(int)


								                # Group by class

								                class_counts = {}

								                for cls_id in classes:

								                    class_counts[cls_id] = class_counts.get(cls_id, 0) + 1


								                print(f"   Classes found: {sorted(class_counts.keys())}")


								                # Focus on shiny icon (class 50)

								                shiny_mask = classes == 50

								                shiny_detections = valid_detections[shiny_mask]


								                if len(shiny_detections) > 0:

								                    print(f"\n✨ SHINY ICON DETECTIONS (Class 50): {len(shiny_detections)}")

								                    for i, det in enumerate(shiny_detections):

								                        print(f"      Detection {i}: conf={det[4]:.6f}, box=[{det[0]:.1f},{det[1]:.1f},{det[2]:.1f},{det[3]:.1f}]")

								                else:

								                    print(f"\n❌ NO SHINY ICON DETECTIONS (Class 50)")


								                # Show high confidence detections

								                high_conf_mask = confidences > 0.1

								                high_conf_detections = valid_detections[high_conf_mask]


								                if len(high_conf_detections) > 0:

								                    print(f"\n🎯 High confidence detections (>0.1): {len(high_conf_detections)}")

								                    for i, det in enumerate(high_conf_detections[:10]):  # Show top 10

								                        print(f"      Class {int(det[5])}: conf={det[4]:.6f}, box=[{det[0]:.1f},{det[1]:.1f},{det[2]:.1f},{det[3]:.1f}]")


								                return valid_detections


								        elif num_values > 80:  # Raw format: [x, y, w, h, obj, class0, class1, ...]

								            print(f"   Format: Raw output ({num_values-5} classes)")


								            # This would need more complex processing for raw outputs

								            print(f"   ⚠️  Raw format detected - would need objectness * class confidence processing")

								            return None


								    else:

								        print(f"   ⚠️  Unexpected output shape: {detections.shape}")

								        return None


								def compare_models(pt_model_path, onnx_model_path, test_image_path):

								    """Compare .pt and ONNX model outputs on the same image"""

								    print("="*80)

								    print("🔍 MODEL COMPARISON DEBUG SESSION")

								    print("="*80)


								    # Check if files exist

								    for path, name in [(pt_model_path, ".pt model"), (onnx_model_path, "ONNX model"), (test_image_path, "test image")]:

								        if not Path(path).exists():

								            print(f"❌ {name} not found: {path}")

								            return


								    # Preprocess image for ONNX

								    try:

								        preprocessed_img, original_img, ratio, pad = preprocess_image(test_image_path)

								    except Exception as e:

								        print(f"❌ Failed to preprocess image: {e}")

								        return


								    # Run .pt model

								    try:

								        pt_boxes, pt_confidences, pt_classes = run_pt_model(pt_model_path, test_image_path)

								    except Exception as e:

								        print(f"❌ Failed to run .pt model: {e}")

								        pt_boxes, pt_confidences, pt_classes = None, None, None


								    # Run ONNX model

								    try:

								        onnx_detections = run_onnx_model(onnx_model_path, preprocessed_img)

								    except Exception as e:

								        print(f"❌ Failed to run ONNX model: {e}")

								        onnx_detections = None


								    # Compare results

								    print("\n" + "="*80)

								    print("📊 COMPARISON SUMMARY")

								    print("="*80)


								    # Count shiny detections

								    pt_shiny_count = 0

								    onnx_shiny_count = 0


								    if pt_classes is not None:

								        pt_shiny_count = np.sum(pt_classes == 50)


								    if onnx_detections is not None and len(onnx_detections) > 0:

								        if onnx_detections.shape[1] == 6:  # NMS format

								            onnx_classes = onnx_detections[:, 5].astype(int)

								            onnx_shiny_count = np.sum(onnx_classes == 50)


								    print(f"🔥 .pt Model Results:")

								    print(f"   Total detections: {len(pt_boxes) if pt_boxes is not None else 0}")

								    print(f"   Shiny icons (class 50): {pt_shiny_count}")


								    print(f"\n🔧 ONNX Model Results:")

								    print(f"   Total detections: {len(onnx_detections) if onnx_detections is not None else 0}")

								    print(f"   Shiny icons (class 50): {onnx_shiny_count}")


								    if pt_shiny_count > 0 and onnx_shiny_count == 0:

								        print(f"\n🚨 ISSUE CONFIRMED: .pt model finds {pt_shiny_count} shiny icons, ONNX finds 0")

								        print(f"   This confirms the preprocessing/inference discrepancy")

								    elif pt_shiny_count == onnx_shiny_count and pt_shiny_count > 0:

								        print(f"\n✅ Both models find {pt_shiny_count} shiny icons - issue may be elsewhere")


								    print("\n" + "="*80)


								if __name__ == "__main__":

								    # Test with available models and image

								    pt_model = "raw_models/best.pt"


								    # Test multiple ONNX variants

								    onnx_models = [

								        "app/src/main/assets/best.onnx",

								        "raw_models/exports/best_no_nms.onnx",

								        "raw_models/exports/best_nms_relaxed.onnx",

								        "raw_models/exports/best_nms_very_relaxed.onnx"

								    ]


								    # You'll need to provide a test image with known shiny icon

								    test_image = "test_images/shiny_test.jpg"  # Replace with actual test image path


								    print("🔍 Looking for test images...")


								    # Try to find a suitable test image

								    test_image_candidates = [

								        "test_images/shiny_test.jpg",

								        "test_images/test.jpg",

								        "screenshots/shiny.jpg",

								        "screenshots/test.png"

								    ]


								    test_image_found = None

								    for candidate in test_image_candidates:

								        if Path(candidate).exists():

								            test_image_found = candidate

								            print(f"   Found test image: {candidate}")

								            break


								    if not test_image_found:

								        print("❌ No test image found. Please provide a test image with shiny icon at one of these paths:")

								        for candidate in test_image_candidates:

								            print(f"   {candidate}")

								        print("\nYou can capture a screenshot with shiny icon and save it as test_images/shiny_test.jpg")

								        exit(1)


								    # Run comparison for each ONNX model

								    for onnx_model in onnx_models:

								        if Path(onnx_model).exists():

								            print(f"\n🔄 Testing ONNX model: {onnx_model}")

								            compare_models(pt_model, onnx_model, test_image_found)

								            print("\n" + "="*120 + "\n")

								        else:

								            print(f"⚠️ ONNX model not found: {onnx_model}")