diff --git a/app/src/main/java/com/quillstudios/pokegoalshelper/ml/YOLOInferenceEngine.kt b/app/src/main/java/com/quillstudios/pokegoalshelper/ml/YOLOInferenceEngine.kt index 89e09f6..4294a11 100644 --- a/app/src/main/java/com/quillstudios/pokegoalshelper/ml/YOLOInferenceEngine.kt +++ b/app/src/main/java/com/quillstudios/pokegoalshelper/ml/YOLOInferenceEngine.kt @@ -743,6 +743,90 @@ class YOLOInferenceEngine( return parseNMSOutput(flat_output, originalSize.width.toInt(), originalSize.height.toInt(), INPUT_SIZE) } + /** + * Data class for transformed coordinates + */ + private data class TransformedCoordinates(val x1: Float, val y1: Float, val x2: Float, val y2: Float) + + /** + * Transform coordinates from model output to original image space + */ + private fun transformCoordinates( + rawX1: Float, rawY1: Float, rawX2: Float, rawY2: Float, + originalWidth: Int, originalHeight: Int, inputScale: Int + ): TransformedCoordinates + { + return when (COORD_TRANSFORM_MODE) + { + "LETTERBOX" -> + { + val letterbox_params = calculateLetterboxInverse(originalWidth, originalHeight, inputScale) + val scale_x = letterbox_params[0] + val scale_y = letterbox_params[1] + val offset_x = letterbox_params[2] + val offset_y = letterbox_params[3] + + TransformedCoordinates( + x1 = (rawX1 - offset_x) * scale_x, + y1 = (rawY1 - offset_y) * scale_y, + x2 = (rawX2 - offset_x) * scale_x, + y2 = (rawY2 - offset_y) * scale_y + ) + } + "DIRECT" -> + { + val direct_scale_x = originalWidth.toFloat() / inputScale.toFloat() + val direct_scale_y = originalHeight.toFloat() / inputScale.toFloat() + + TransformedCoordinates( + x1 = rawX1 * direct_scale_x, + y1 = rawY1 * direct_scale_y, + x2 = rawX2 * direct_scale_x, + y2 = rawY2 * direct_scale_y + ) + } + "HYBRID" -> + { + val letterbox_params = calculateLetterboxInverse(originalWidth, originalHeight, inputScale) + val offset_x = letterbox_params[2] + val offset_y = letterbox_params[3] + + val scale = minOf(inputScale.toDouble() / originalWidth, inputScale.toDouble() / originalHeight) + val scaled_width = (originalWidth * scale) + val scaled_height = (originalHeight * scale) + val hybrid_scale_x = originalWidth.toFloat() / scaled_width.toFloat() + val hybrid_scale_y = originalHeight.toFloat() / scaled_height.toFloat() + + TransformedCoordinates( + x1 = (rawX1 - offset_x) * hybrid_scale_x, + y1 = (rawY1 - offset_y) * hybrid_scale_y, + x2 = (rawX2 - offset_x) * hybrid_scale_x, + y2 = (rawY2 - offset_y) * hybrid_scale_y + ) + } + else -> + { + // Default to HYBRID mode for unknown coordinate modes + val letterbox_params = calculateLetterboxInverse(originalWidth, originalHeight, inputScale) + val offset_x = letterbox_params[2] + val offset_y = letterbox_params[3] + + val scale = minOf(inputScale.toDouble() / originalWidth, inputScale.toDouble() / originalHeight) + val scaled_width = (originalWidth * scale) + val scaled_height = (originalHeight * scale) + val hybrid_scale_x = originalWidth.toFloat() / scaled_width.toFloat() + val hybrid_scale_y = originalHeight.toFloat() / scaled_height.toFloat() + + TransformedCoordinates( + x1 = (rawX1 - offset_x) * hybrid_scale_x, + y1 = (rawY1 - offset_y) * hybrid_scale_y, + x2 = (rawX2 - offset_x) * hybrid_scale_x, + y2 = (rawY2 - offset_y) * hybrid_scale_y + ) + } + } + } + /** * Parse NMS (Non-Maximum Suppression) output format */ @@ -761,73 +845,16 @@ class YOLOInferenceEngine( { val base_idx = i * features_per_detection - // Extract detection data: [x1, y1, x2, y2, confidence, class_id] - val x1: Float - val y1: Float - val x2: Float - val y2: Float - - when (COORD_TRANSFORM_MODE) - { - "LETTERBOX" -> - { - val letterbox_params = calculateLetterboxInverse(originalWidth, originalHeight, inputScale) - val scale_x = letterbox_params[0] - val scale_y = letterbox_params[1] - val offset_x = letterbox_params[2] - val offset_y = letterbox_params[3] - - x1 = (output[base_idx] - offset_x) * scale_x - y1 = (output[base_idx + 1] - offset_y) * scale_y - x2 = (output[base_idx + 2] - offset_x) * scale_x - y2 = (output[base_idx + 3] - offset_y) * scale_y - } - "DIRECT" -> - { - val direct_scale_x = originalWidth.toFloat() / inputScale.toFloat() - val direct_scale_y = originalHeight.toFloat() / inputScale.toFloat() - - x1 = output[base_idx] * direct_scale_x - y1 = output[base_idx + 1] * direct_scale_y - x2 = output[base_idx + 2] * direct_scale_x - y2 = output[base_idx + 3] * direct_scale_y - } - "HYBRID" -> - { - val letterbox_params = calculateLetterboxInverse(originalWidth, originalHeight, inputScale) - val offset_x = letterbox_params[2] - val offset_y = letterbox_params[3] - - val scale = minOf(inputScale.toDouble() / originalWidth, inputScale.toDouble() / originalHeight) - val scaled_width = (originalWidth * scale) - val scaled_height = (originalHeight * scale) - val hybrid_scale_x = originalWidth.toFloat() / scaled_width.toFloat() - val hybrid_scale_y = originalHeight.toFloat() / scaled_height.toFloat() - - x1 = (output[base_idx] - offset_x) * hybrid_scale_x - y1 = (output[base_idx + 1] - offset_y) * hybrid_scale_y - x2 = (output[base_idx + 2] - offset_x) * hybrid_scale_x - y2 = (output[base_idx + 3] - offset_y) * hybrid_scale_y - } - else -> - { - // Default to HYBRID - val letterbox_params = calculateLetterboxInverse(originalWidth, originalHeight, inputScale) - val offset_x = letterbox_params[2] - val offset_y = letterbox_params[3] - - val scale = minOf(inputScale.toDouble() / originalWidth, inputScale.toDouble() / originalHeight) - val scaled_width = (originalWidth * scale) - val scaled_height = (originalHeight * scale) - val hybrid_scale_x = originalWidth.toFloat() / scaled_width.toFloat() - val hybrid_scale_y = originalHeight.toFloat() / scaled_height.toFloat() - - x1 = (output[base_idx] - offset_x) * hybrid_scale_x - y1 = (output[base_idx + 1] - offset_y) * hybrid_scale_y - x2 = (output[base_idx + 2] - offset_x) * hybrid_scale_x - y2 = (output[base_idx + 3] - offset_y) * hybrid_scale_y - } - } + // Extract and transform coordinates from model output + val coords = transformCoordinates( + rawX1 = output[base_idx], + rawY1 = output[base_idx + 1], + rawX2 = output[base_idx + 2], + rawY2 = output[base_idx + 3], + originalWidth = originalWidth, + originalHeight = originalHeight, + inputScale = inputScale + ) val confidence = output[base_idx + 4] val class_id = output[base_idx + 5].toInt() @@ -866,10 +893,10 @@ class YOLOInferenceEngine( { // Convert from corner coordinates (x1,y1,x2,y2) to BoundingBox format // Clamp coordinates to image boundaries - val clamped_x1 = max(0.0f, min(x1, originalWidth.toFloat())) - val clamped_y1 = max(0.0f, min(y1, originalHeight.toFloat())) - val clamped_x2 = max(clamped_x1, min(x2, originalWidth.toFloat())) - val clamped_y2 = max(clamped_y1, min(y2, originalHeight.toFloat())) + val clamped_x1 = max(0.0f, min(coords.x1, originalWidth.toFloat())) + val clamped_y1 = max(0.0f, min(coords.y1, originalHeight.toFloat())) + val clamped_x2 = max(clamped_x1, min(coords.x2, originalWidth.toFloat())) + val clamped_y2 = max(clamped_y1, min(coords.y2, originalHeight.toFloat())) // Validate bounding box dimensions and coordinates if (clamped_x2 > clamped_x1 && clamped_y2 > clamped_y1)