@ -744,30 +744,19 @@ class YOLOInferenceEngine(
}
/ * *
* Parse NMS ( Non - Maximum Suppression ) output format
* Data class for transformed coordinates
* /
private fun parseNMSOutput ( output : FloatArray , originalWidth : Int , originalHeight : Int , inputScale : Int ) : List < Detection >
{
val detections = mutableListOf < Detection > ( )
val num_detections = 300 // From model output [1, 300, 6]
val features_per_detection = 6 // [x1, y1, x2, y2, confidence, class_id]
Log . d ( TAG , " 🔍 Parsing NMS output: 300 post-processed detections " )
var valid_detections = 0
private data class TransformedCoordinates ( val x1 : Float , val y1 : Float , val x2 : Float , val y2 : Float )
for ( i in 0 until num_detections )
/ * *
* Transform coordinates from model output to original image space
* /
private fun transformCoordinates (
rawX1 : Float , rawY1 : Float , rawX2 : Float , rawY2 : Float ,
originalWidth : Int , originalHeight : Int , inputScale : Int
) : TransformedCoordinates
{
val base_idx = i * features_per_detection
// Extract detection data: [x1, y1, x2, y2, confidence, class_id]
val x1 : Float
val y1 : Float
val x2 : Float
val y2 : Float
when ( COORD_TRANSFORM_MODE )
return when ( COORD_TRANSFORM_MODE )
{
" LETTERBOX " ->
{
@ -777,20 +766,24 @@ class YOLOInferenceEngine(
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
x1 = ( output [ base_idx ] - offset_x ) * scale_x
y1 = ( output [ base_idx + 1 ] - offset_y ) * scale_y
x2 = ( output [ base_idx + 2 ] - offset_x ) * scale_x
y2 = ( output [ base_idx + 3 ] - offset_y ) * scale_y
TransformedCoordinates (
x1 = ( rawX1 - offset_x ) * scale_x ,
y1 = ( rawY1 - offset_y ) * scale_y ,
x2 = ( rawX2 - offset_x ) * scale_x ,
y2 = ( rawY2 - offset_y ) * scale_y
)
}
" DIRECT " ->
{
val direct_scale_x = originalWidth . toFloat ( ) / inputScale . toFloat ( )
val direct_scale_y = originalHeight . toFloat ( ) / inputScale . toFloat ( )
x1 = output [ base_idx ] * direct_scale_x
y1 = output [ base_idx + 1 ] * direct_scale_y
x2 = output [ base_idx + 2 ] * direct_scale_x
y2 = output [ base_idx + 3 ] * direct_scale_y
TransformedCoordinates (
x1 = rawX1 * direct_scale_x ,
y1 = rawY1 * direct_scale_y ,
x2 = rawX2 * direct_scale_x ,
y2 = rawY2 * direct_scale_y
)
}
" HYBRID " ->
{
@ -804,14 +797,16 @@ class YOLOInferenceEngine(
val hybrid_scale_x = originalWidth . toFloat ( ) / scaled_width . toFloat ( )
val hybrid_scale_y = originalHeight . toFloat ( ) / scaled_height . toFloat ( )
x1 = ( output [ base_idx ] - offset_x ) * hybrid_scale_x
y1 = ( output [ base_idx + 1 ] - offset_y ) * hybrid_scale_y
x2 = ( output [ base_idx + 2 ] - offset_x ) * hybrid_scale_x
y2 = ( output [ base_idx + 3 ] - offset_y ) * hybrid_scale_y
TransformedCoordinates (
x1 = ( rawX1 - offset_x ) * hybrid_scale_x ,
y1 = ( rawY1 - offset_y ) * hybrid_scale_y ,
x2 = ( rawX2 - offset_x ) * hybrid_scale_x ,
y2 = ( rawY2 - offset_y ) * hybrid_scale_y
)
}
else ->
{
// Default to HYBRID
// Default to HYBRID mode for unknown coordinate modes
val letterbox_params = calculateLetterboxInverse ( originalWidth , originalHeight , inputScale )
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
@ -822,12 +817,44 @@ class YOLOInferenceEngine(
val hybrid_scale_x = originalWidth . toFloat ( ) / scaled_width . toFloat ( )
val hybrid_scale_y = originalHeight . toFloat ( ) / scaled_height . toFloat ( )
x1 = ( output [ base_idx ] - offset_x ) * hybrid_scale_x
y1 = ( output [ base_idx + 1 ] - offset_y ) * hybrid_scale_y
x2 = ( output [ base_idx + 2 ] - offset_x ) * hybrid_scale_x
y2 = ( output [ base_idx + 3 ] - offset_y ) * hybrid_scale_y
TransformedCoordinates (
x1 = ( rawX1 - offset_x ) * hybrid_scale_x ,
y1 = ( rawY1 - offset_y ) * hybrid_scale_y ,
x2 = ( rawX2 - offset_x ) * hybrid_scale_x ,
y2 = ( rawY2 - offset_y ) * hybrid_scale_y
)
}
}
}
/ * *
* Parse NMS ( Non - Maximum Suppression ) output format
* /
private fun parseNMSOutput ( output : FloatArray , originalWidth : Int , originalHeight : Int , inputScale : Int ) : List < Detection >
{
val detections = mutableListOf < Detection > ( )
val num_detections = 300 // From model output [1, 300, 6]
val features_per_detection = 6 // [x1, y1, x2, y2, confidence, class_id]
Log . d ( TAG , " 🔍 Parsing NMS output: 300 post-processed detections " )
var valid_detections = 0
for ( i in 0 until num_detections )
{
val base_idx = i * features_per_detection
// Extract and transform coordinates from model output
val coords = transformCoordinates (
rawX1 = output [ base_idx ] ,
rawY1 = output [ base_idx + 1 ] ,
rawX2 = output [ base_idx + 2 ] ,
rawY2 = output [ base_idx + 3 ] ,
originalWidth = originalWidth ,
originalHeight = originalHeight ,
inputScale = inputScale
)
val confidence = output [ base_idx + 4 ]
val class_id = output [ base_idx + 5 ] . toInt ( )
@ -866,10 +893,10 @@ class YOLOInferenceEngine(
{
// Convert from corner coordinates (x1,y1,x2,y2) to BoundingBox format
// Clamp coordinates to image boundaries
val clamped_x1 = max ( 0.0f , min ( x1 , originalWidth . toFloat ( ) ) )
val clamped_y1 = max ( 0.0f , min ( y1 , originalHeight . toFloat ( ) ) )
val clamped_x2 = max ( clamped_x1 , min ( x2 , originalWidth . toFloat ( ) ) )
val clamped_y2 = max ( clamped_y1 , min ( y2 , originalHeight . toFloat ( ) ) )
val clamped_x1 = max ( 0.0f , min ( coords . x1 , originalWidth . toFloat ( ) ) )
val clamped_y1 = max ( 0.0f , min ( coords . y1 , originalHeight . toFloat ( ) ) )
val clamped_x2 = max ( clamped_x1 , min ( coords . x2 , originalWidth . toFloat ( ) ) )
val clamped_y2 = max ( clamped_y1 , min ( coords . y2 , originalHeight . toFloat ( ) ) )
// Validate bounding box dimensions and coordinates
if ( clamped_x2 > clamped_x1 && clamped_y2 > clamped_y1 )