@ -743,6 +743,90 @@ class YOLOInferenceEngine(
return parseNMSOutput ( flat_output , originalSize . width . toInt ( ) , originalSize . height . toInt ( ) , INPUT_SIZE )
}
/ * *
* Data class for transformed coordinates
* /
private data class TransformedCoordinates ( val x1 : Float , val y1 : Float , val x2 : Float , val y2 : Float )
/ * *
* Transform coordinates from model output to original image space
* /
private fun transformCoordinates (
rawX1 : Float , rawY1 : Float , rawX2 : Float , rawY2 : Float ,
originalWidth : Int , originalHeight : Int , inputScale : Int
) : TransformedCoordinates
{
return when ( COORD_TRANSFORM_MODE )
{
" LETTERBOX " ->
{
val letterbox_params = calculateLetterboxInverse ( originalWidth , originalHeight , inputScale )
val scale_x = letterbox_params [ 0 ]
val scale_y = letterbox_params [ 1 ]
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
TransformedCoordinates (
x1 = ( rawX1 - offset_x ) * scale_x ,
y1 = ( rawY1 - offset_y ) * scale_y ,
x2 = ( rawX2 - offset_x ) * scale_x ,
y2 = ( rawY2 - offset_y ) * scale_y
)
}
" DIRECT " ->
{
val direct_scale_x = originalWidth . toFloat ( ) / inputScale . toFloat ( )
val direct_scale_y = originalHeight . toFloat ( ) / inputScale . toFloat ( )
TransformedCoordinates (
x1 = rawX1 * direct_scale_x ,
y1 = rawY1 * direct_scale_y ,
x2 = rawX2 * direct_scale_x ,
y2 = rawY2 * direct_scale_y
)
}
" HYBRID " ->
{
val letterbox_params = calculateLetterboxInverse ( originalWidth , originalHeight , inputScale )
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
val scale = minOf ( inputScale . toDouble ( ) / originalWidth , inputScale . toDouble ( ) / originalHeight )
val scaled_width = ( originalWidth * scale )
val scaled_height = ( originalHeight * scale )
val hybrid_scale_x = originalWidth . toFloat ( ) / scaled_width . toFloat ( )
val hybrid_scale_y = originalHeight . toFloat ( ) / scaled_height . toFloat ( )
TransformedCoordinates (
x1 = ( rawX1 - offset_x ) * hybrid_scale_x ,
y1 = ( rawY1 - offset_y ) * hybrid_scale_y ,
x2 = ( rawX2 - offset_x ) * hybrid_scale_x ,
y2 = ( rawY2 - offset_y ) * hybrid_scale_y
)
}
else ->
{
// Default to HYBRID mode for unknown coordinate modes
val letterbox_params = calculateLetterboxInverse ( originalWidth , originalHeight , inputScale )
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
val scale = minOf ( inputScale . toDouble ( ) / originalWidth , inputScale . toDouble ( ) / originalHeight )
val scaled_width = ( originalWidth * scale )
val scaled_height = ( originalHeight * scale )
val hybrid_scale_x = originalWidth . toFloat ( ) / scaled_width . toFloat ( )
val hybrid_scale_y = originalHeight . toFloat ( ) / scaled_height . toFloat ( )
TransformedCoordinates (
x1 = ( rawX1 - offset_x ) * hybrid_scale_x ,
y1 = ( rawY1 - offset_y ) * hybrid_scale_y ,
x2 = ( rawX2 - offset_x ) * hybrid_scale_x ,
y2 = ( rawY2 - offset_y ) * hybrid_scale_y
)
}
}
}
/ * *
* Parse NMS ( Non - Maximum Suppression ) output format
* /
@ -761,73 +845,16 @@ class YOLOInferenceEngine(
{
val base_idx = i * features_per_detection
// Extract detection data: [x1, y1, x2, y2, confidence, class_id]
val x1 : Float
val y1 : Float
val x2 : Float
val y2 : Float
when ( COORD_TRANSFORM_MODE )
{
" LETTERBOX " ->
{
val letterbox_params = calculateLetterboxInverse ( originalWidth , originalHeight , inputScale )
val scale_x = letterbox_params [ 0 ]
val scale_y = letterbox_params [ 1 ]
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
x1 = ( output [ base_idx ] - offset_x ) * scale_x
y1 = ( output [ base_idx + 1 ] - offset_y ) * scale_y
x2 = ( output [ base_idx + 2 ] - offset_x ) * scale_x
y2 = ( output [ base_idx + 3 ] - offset_y ) * scale_y
}
" DIRECT " ->
{
val direct_scale_x = originalWidth . toFloat ( ) / inputScale . toFloat ( )
val direct_scale_y = originalHeight . toFloat ( ) / inputScale . toFloat ( )
x1 = output [ base_idx ] * direct_scale_x
y1 = output [ base_idx + 1 ] * direct_scale_y
x2 = output [ base_idx + 2 ] * direct_scale_x
y2 = output [ base_idx + 3 ] * direct_scale_y
}
" HYBRID " ->
{
val letterbox_params = calculateLetterboxInverse ( originalWidth , originalHeight , inputScale )
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
val scale = minOf ( inputScale . toDouble ( ) / originalWidth , inputScale . toDouble ( ) / originalHeight )
val scaled_width = ( originalWidth * scale )
val scaled_height = ( originalHeight * scale )
val hybrid_scale_x = originalWidth . toFloat ( ) / scaled_width . toFloat ( )
val hybrid_scale_y = originalHeight . toFloat ( ) / scaled_height . toFloat ( )
x1 = ( output [ base_idx ] - offset_x ) * hybrid_scale_x
y1 = ( output [ base_idx + 1 ] - offset_y ) * hybrid_scale_y
x2 = ( output [ base_idx + 2 ] - offset_x ) * hybrid_scale_x
y2 = ( output [ base_idx + 3 ] - offset_y ) * hybrid_scale_y
}
else ->
{
// Default to HYBRID
val letterbox_params = calculateLetterboxInverse ( originalWidth , originalHeight , inputScale )
val offset_x = letterbox_params [ 2 ]
val offset_y = letterbox_params [ 3 ]
val scale = minOf ( inputScale . toDouble ( ) / originalWidth , inputScale . toDouble ( ) / originalHeight )
val scaled_width = ( originalWidth * scale )
val scaled_height = ( originalHeight * scale )
val hybrid_scale_x = originalWidth . toFloat ( ) / scaled_width . toFloat ( )
val hybrid_scale_y = originalHeight . toFloat ( ) / scaled_height . toFloat ( )
x1 = ( output [ base_idx ] - offset_x ) * hybrid_scale_x
y1 = ( output [ base_idx + 1 ] - offset_y ) * hybrid_scale_y
x2 = ( output [ base_idx + 2 ] - offset_x ) * hybrid_scale_x
y2 = ( output [ base_idx + 3 ] - offset_y ) * hybrid_scale_y
}
}
// Extract and transform coordinates from model output
val coords = transformCoordinates (
rawX1 = output [ base_idx ] ,
rawY1 = output [ base_idx + 1 ] ,
rawX2 = output [ base_idx + 2 ] ,
rawY2 = output [ base_idx + 3 ] ,
originalWidth = originalWidth ,
originalHeight = originalHeight ,
inputScale = inputScale
)
val confidence = output [ base_idx + 4 ]
val class_id = output [ base_idx + 5 ] . toInt ( )
@ -866,10 +893,10 @@ class YOLOInferenceEngine(
{
// Convert from corner coordinates (x1,y1,x2,y2) to BoundingBox format
// Clamp coordinates to image boundaries
val clamped_x1 = max ( 0.0f , min ( x1 , originalWidth . toFloat ( ) ) )
val clamped_y1 = max ( 0.0f , min ( y1 , originalHeight . toFloat ( ) ) )
val clamped_x2 = max ( clamped_x1 , min ( x2 , originalWidth . toFloat ( ) ) )
val clamped_y2 = max ( clamped_y1 , min ( y2 , originalHeight . toFloat ( ) ) )
val clamped_x1 = max ( 0.0f , min ( coords . x1 , originalWidth . toFloat ( ) ) )
val clamped_y1 = max ( 0.0f , min ( coords . y1 , originalHeight . toFloat ( ) ) )
val clamped_x2 = max ( clamped_x1 , min ( coords . x2 , originalWidth . toFloat ( ) ) )
val clamped_y2 = max ( clamped_y1 , min ( coords . y2 , originalHeight . toFloat ( ) ) )
// Validate bounding box dimensions and coordinates
if ( clamped_x2 > clamped_x1 && clamped_y2 > clamped_y1 )