|
|
|
@ -21,7 +21,7 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
private const val TAG = "YOLOOnnxDetector" |
|
|
|
private const val MODEL_FILE = "best.onnx" |
|
|
|
private const val INPUT_SIZE = 640 |
|
|
|
private const val CONFIDENCE_THRESHOLD = 0.65f // Higher threshold to filter enhanced detections |
|
|
|
private const val CONFIDENCE_THRESHOLD = 0.45f // Lowered to match .pt model detection levels |
|
|
|
private const val NMS_THRESHOLD = 0.3f // More aggressive merging of overlapping boxes |
|
|
|
private const val NUM_CHANNELS = 3 |
|
|
|
private const val NUM_DETECTIONS = 8400 // YOLOv8 default |
|
|
|
@ -35,11 +35,22 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
// Coordinate transformation modes - HYBRID is the correct method |
|
|
|
var COORD_TRANSFORM_MODE = "HYBRID" // HYBRID and LETTERBOX work correctly |
|
|
|
|
|
|
|
// Class filtering for debugging |
|
|
|
var DEBUG_CLASS_FILTER: String? = null // Set to class name to show only that class |
|
|
|
var SHOW_ALL_CONFIDENCES = false // Show all detections with their confidences |
|
|
|
|
|
|
|
fun setCoordinateMode(mode: String) { |
|
|
|
COORD_TRANSFORM_MODE = mode |
|
|
|
Log.i(TAG, "🔧 Coordinate transform mode changed to: $mode") |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fun toggleShowAllConfidences() { |
|
|
|
SHOW_ALL_CONFIDENCES = !SHOW_ALL_CONFIDENCES |
|
|
|
Log.i(TAG, "📊 Show all confidences: $SHOW_ALL_CONFIDENCES") |
|
|
|
} |
|
|
|
|
|
|
|
// Preprocessing enhancement techniques |
|
|
|
private const val ENABLE_CONTRAST_ENHANCEMENT = true |
|
|
|
private const val ENABLE_SHARPENING = true |
|
|
|
@ -49,6 +60,15 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
// Confidence threshold optimization for mobile ONNX vs raw processing |
|
|
|
private const val ENABLE_CONFIDENCE_MAPPING = true |
|
|
|
private const val RAW_TO_MOBILE_SCALE = 0.75f // Based on observation that mobile shows lower conf |
|
|
|
|
|
|
|
fun setClassFilter(className: String?) { |
|
|
|
DEBUG_CLASS_FILTER = className |
|
|
|
if (className != null) { |
|
|
|
Log.i(TAG, "🔍 Class filter set to: '$className' (ID will be shown in debug output)") |
|
|
|
} else { |
|
|
|
Log.i(TAG, "🔍 Class filter set to: ALL CLASSES") |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
private var ortSession: OrtSession? = null |
|
|
|
@ -640,23 +660,18 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
// Each preprocessing method creates different coordinate space - use method-specific transform |
|
|
|
return when (method) { |
|
|
|
"ultralytics" -> { |
|
|
|
Log.d(TAG, "🔧 Method: $method → Using LETTERBOX transform (letterbox preprocessing)") |
|
|
|
parseNMSOutputWithTransform(output, originalWidth, originalHeight, inputScale, "LETTERBOX") |
|
|
|
} |
|
|
|
"enhanced" -> { |
|
|
|
Log.d(TAG, "🔧 Method: $method → Using DIRECT transform (simple resize)") |
|
|
|
parseNMSOutputWithTransform(output, originalWidth, originalHeight, inputScale, "DIRECT") |
|
|
|
} |
|
|
|
"sharpened" -> { |
|
|
|
Log.d(TAG, "🔧 Method: $method → Using DIRECT transform (simple resize)") |
|
|
|
parseNMSOutputWithTransform(output, originalWidth, originalHeight, inputScale, "DIRECT") |
|
|
|
} |
|
|
|
"original" -> { |
|
|
|
Log.d(TAG, "🔧 Method: $method → Using DIRECT transform (simple resize)") |
|
|
|
parseNMSOutputWithTransform(output, originalWidth, originalHeight, inputScale, "DIRECT") |
|
|
|
} |
|
|
|
else -> { |
|
|
|
Log.d(TAG, "🔧 Method: $method → Using HYBRID transform (fallback)") |
|
|
|
parseNMSOutputWithTransform(output, originalWidth, originalHeight, inputScale, "HYBRID") |
|
|
|
} |
|
|
|
} |
|
|
|
@ -667,7 +682,6 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
val numDetections = 300 // From model output [1, 300, 6] |
|
|
|
val featuresPerDetection = 6 // [x1, y1, x2, y2, confidence, class_id] |
|
|
|
|
|
|
|
Log.d(TAG, "🔍 Parsing NMS output with $transformMode transform") |
|
|
|
|
|
|
|
var validDetections = 0 |
|
|
|
|
|
|
|
@ -748,8 +762,23 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
confidence |
|
|
|
} |
|
|
|
|
|
|
|
// Filter by confidence threshold and validate coordinates |
|
|
|
if (mappedConfidence > CONFIDENCE_THRESHOLD && classId >= 0 && classId < classNames.size) { |
|
|
|
// Get class name for filtering and debugging |
|
|
|
val className = if (classId >= 0 && classId < classNames.size) { |
|
|
|
classNames[classId] ?: "unknown_$classId" |
|
|
|
} else { |
|
|
|
"unknown_$classId" |
|
|
|
} |
|
|
|
|
|
|
|
// Debug logging for all detections if enabled |
|
|
|
if (SHOW_ALL_CONFIDENCES && mappedConfidence > 0.1f) { |
|
|
|
Log.d(TAG, "🔍 [DEBUG] Class: $className (ID: $classId), Confidence: %.3f, Original: %.3f".format(mappedConfidence, confidence)) |
|
|
|
} |
|
|
|
|
|
|
|
// Apply class filtering if set |
|
|
|
val passesClassFilter = DEBUG_CLASS_FILTER == null || DEBUG_CLASS_FILTER == className |
|
|
|
|
|
|
|
// Filter by confidence threshold, class filter, and validate coordinates |
|
|
|
if (mappedConfidence > CONFIDENCE_THRESHOLD && classId >= 0 && classId < classNames.size && passesClassFilter) { |
|
|
|
// Convert from corner coordinates (x1,y1,x2,y2) to x,y,w,h format |
|
|
|
// Clamp coordinates to image boundaries |
|
|
|
val clampedX1 = kotlin.math.max(0.0f, kotlin.math.min(x1, originalWidth.toFloat())) |
|
|
|
@ -766,11 +795,6 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
if (width > 0 && height > 0 && x >= 0 && y >= 0 && |
|
|
|
x < originalWidth && y < originalHeight && |
|
|
|
(x + width) <= originalWidth && (y + height) <= originalHeight) { |
|
|
|
val className = if (classId >= 0 && classId < classNames.size) { |
|
|
|
classNames[classId] ?: "unknown_$classId" |
|
|
|
} else { |
|
|
|
"unknown_$classId" |
|
|
|
} |
|
|
|
|
|
|
|
detections.add( |
|
|
|
Detection( |
|
|
|
@ -784,13 +808,11 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
validDetections++ |
|
|
|
|
|
|
|
if (validDetections <= 3) { |
|
|
|
Log.d(TAG, "✅ Valid detection ($transformMode): $className (${String.format("%.3f", mappedConfidence)}) → [$x, $y, $width, $height]") |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Log.d(TAG, "🎯 $transformMode parsing complete: $validDetections valid detections") |
|
|
|
return detections.sortedByDescending { it.confidence } |
|
|
|
} |
|
|
|
|
|
|
|
@ -961,10 +983,9 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Log.d(TAG, "📐 Coordinate analysis: min=${String.format("%.1f", minCoord)}, max=${String.format("%.1f", maxCoord)}") |
|
|
|
//Log.d(TAG, "📐 Coordinate analysis: min=${String.format("%.1f", minCoord)}, max=${String.format("%.1f", maxCoord)}") |
|
|
|
|
|
|
|
// Use the configurable coordinate transformation mode |
|
|
|
Log.d(TAG, "📐 Using coordinate transform mode: $COORD_TRANSFORM_MODE") |
|
|
|
|
|
|
|
when (COORD_TRANSFORM_MODE) { |
|
|
|
"LETTERBOX" -> { |
|
|
|
@ -973,12 +994,10 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
val scaleY = letterboxParams[1] |
|
|
|
val offsetX = letterboxParams[2] |
|
|
|
val offsetY = letterboxParams[3] |
|
|
|
Log.d(TAG, "📐 LETTERBOX transform: scaleX=${String.format("%.3f", scaleX)}, scaleY=${String.format("%.3f", scaleY)}, offsetX=${String.format("%.1f", offsetX)}, offsetY=${String.format("%.1f", offsetY)}") |
|
|
|
} |
|
|
|
"DIRECT" -> { |
|
|
|
val directScaleX = originalWidth.toFloat() / inputScale.toFloat() |
|
|
|
val directScaleY = originalHeight.toFloat() / inputScale.toFloat() |
|
|
|
Log.d(TAG, "📐 DIRECT transform: scaleX=${String.format("%.3f", directScaleX)}, scaleY=${String.format("%.3f", directScaleY)}") |
|
|
|
} |
|
|
|
"HYBRID" -> { |
|
|
|
val scale = minOf(inputScale.toDouble() / originalWidth, inputScale.toDouble() / originalHeight) |
|
|
|
@ -986,7 +1005,6 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
val scaledHeight = (originalHeight * scale) |
|
|
|
val hybridScaleX = originalWidth.toFloat() / scaledWidth.toFloat() |
|
|
|
val hybridScaleY = originalHeight.toFloat() / scaledHeight.toFloat() |
|
|
|
Log.d(TAG, "📐 HYBRID transform: scaledSize=${String.format("%.1fx%.1f", scaledWidth, scaledHeight)}, hybridScale=${String.format("%.3fx%.3f", hybridScaleX, hybridScaleY)}") |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@ -1076,8 +1094,23 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
confidence |
|
|
|
} |
|
|
|
|
|
|
|
// Filter by confidence threshold and validate coordinates |
|
|
|
if (mappedConfidence > CONFIDENCE_THRESHOLD && classId >= 0 && classId < classNames.size) { |
|
|
|
// Get class name for filtering and debugging |
|
|
|
val className = if (classId >= 0 && classId < classNames.size) { |
|
|
|
classNames[classId] ?: "unknown_$classId" |
|
|
|
} else { |
|
|
|
"unknown_$classId" |
|
|
|
} |
|
|
|
|
|
|
|
// Debug logging for all detections if enabled |
|
|
|
if (SHOW_ALL_CONFIDENCES && mappedConfidence > 0.1f) { |
|
|
|
Log.d(TAG, "🔍 [DEBUG] Class: $className (ID: $classId), Confidence: %.3f, Original: %.3f".format(mappedConfidence, confidence)) |
|
|
|
} |
|
|
|
|
|
|
|
// Apply class filtering if set |
|
|
|
val passesClassFilter = DEBUG_CLASS_FILTER == null || DEBUG_CLASS_FILTER == className |
|
|
|
|
|
|
|
// Filter by confidence threshold, class filter, and validate coordinates |
|
|
|
if (mappedConfidence > CONFIDENCE_THRESHOLD && classId >= 0 && classId < classNames.size && passesClassFilter) { |
|
|
|
// Convert from corner coordinates (x1,y1,x2,y2) to x,y,w,h format |
|
|
|
// Clamp coordinates to image boundaries |
|
|
|
val clampedX1 = kotlin.math.max(0.0f, kotlin.math.min(x1, originalWidth.toFloat())) |
|
|
|
@ -1099,7 +1132,6 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
if (width > 0 && height > 0 && x >= 0 && y >= 0 && |
|
|
|
x < originalWidth && y < originalHeight && |
|
|
|
(x + width) <= originalWidth && (y + height) <= originalHeight) { |
|
|
|
val className = classNames[classId] ?: "unknown_$classId" |
|
|
|
|
|
|
|
detections.add( |
|
|
|
Detection( |
|
|
|
@ -1243,7 +1275,7 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
resized.release() |
|
|
|
roiMat.release() |
|
|
|
|
|
|
|
Log.d(TAG, "📐 Letterbox: ${originalWidth}x${originalHeight} → ${newWidth}x${newHeight} → ${targetWidth}x${targetHeight} (scale: ${String.format("%.3f", scale)})") |
|
|
|
//Log.d(TAG, "📐 Letterbox: ${originalWidth}x${originalHeight} → ${newWidth}x${newHeight} → ${targetWidth}x${targetHeight} (scale: ${String.format("%.3f", scale)})") |
|
|
|
|
|
|
|
return letterboxed |
|
|
|
} |
|
|
|
@ -1260,7 +1292,7 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
inputScale.toDouble() / originalHeight |
|
|
|
) |
|
|
|
|
|
|
|
Log.d(TAG, "📐 Scale calculation: min(${inputScale}/${originalWidth}, ${inputScale}/${originalHeight}) = min(${String.format("%.4f", inputScale.toDouble() / originalWidth)}, ${String.format("%.4f", inputScale.toDouble() / originalHeight)}) = ${String.format("%.4f", scale)}") |
|
|
|
//Log.d(TAG, "📐 Scale calculation: min(${inputScale}/${originalWidth}, ${inputScale}/${originalHeight}) = min(${String.format("%.4f", inputScale.toDouble() / originalWidth)}, ${String.format("%.4f", inputScale.toDouble() / originalHeight)}) = ${String.format("%.4f", scale)}") |
|
|
|
|
|
|
|
// Calculate the scaled dimensions (what the image became after resize but before padding) |
|
|
|
val scaledWidth = (originalWidth * scale) |
|
|
|
@ -1277,15 +1309,14 @@ class YOLOOnnxDetector(private val context: Context) { |
|
|
|
val scaleBackX = 1.0 / scale // Same for both X and Y since letterbox uses uniform scaling |
|
|
|
val scaleBackY = 1.0 / scale |
|
|
|
|
|
|
|
Log.d(TAG, "📐 Letterbox inverse: original=${originalWidth}x${originalHeight}, scaled=${String.format("%.1f", scaledWidth)}x${String.format("%.1f", scaledHeight)}") |
|
|
|
Log.d(TAG, "📐 Letterbox inverse: offset=(${String.format("%.1f", offsetX)}, ${String.format("%.1f", offsetY)}), scale=(${String.format("%.3f", scaleBackX)}, ${String.format("%.3f", scaleBackY)})") |
|
|
|
//Log.d(TAG, "📐 Letterbox inverse: original=${originalWidth}x${originalHeight}, scaled=${String.format("%.1f", scaledWidth)}x${String.format("%.1f", scaledHeight)}") |
|
|
|
//Log.d(TAG, "📐 Letterbox inverse: offset=(${String.format("%.1f", offsetX)}, ${String.format("%.1f", offsetY)}), scale=(${String.format("%.3f", scaleBackX)}, ${String.format("%.3f", scaleBackY)})") |
|
|
|
|
|
|
|
// Sanity check: verify transformation with a known point |
|
|
|
val testX = 100.0 // Test point in letterboxed space |
|
|
|
val testY = 100.0 |
|
|
|
val transformedX = (testX - offsetX) * scaleBackX |
|
|
|
val transformedY = (testY - offsetY) * scaleBackY |
|
|
|
Log.d(TAG, "📐 Test transform: (100,100) in letterbox → (${String.format("%.1f", transformedX)}, ${String.format("%.1f", transformedY)}) in original") |
|
|
|
|
|
|
|
return arrayOf(scaleBackX.toFloat(), scaleBackY.toFloat(), offsetX.toFloat(), offsetY.toFloat()) |
|
|
|
} |
|
|
|
|