I am working on an Android app where I am already using OpenCV, I got a model which is in onnx format from YOLOv8 after conversion. Here is the output metadata of it.
So far I am successfully running the model but in the end, the output that I got I can't comprehend.
This is the print statement from the output
Mat [ 1* 5* 8400*CV_32FC1, isCont=true, isSubmat=true, nativeObj=0x72345b4840, dataAddr=0x723076b000 ]
class Detector(private val context: Context) {
private var net: Net? = null
fun detect(frame: Bitmap) {
// preprocess image
val mat = Mat()
Utils.bitmapToMat(resizedBitmap, mat)
Imgproc.cvtColor(mat, mat, Imgproc.COLOR_RGBA2RGB)
val inputBlob = Dnn.blobFromImage(mat, 1.0/255.0, Size(640.0, 640.0), Scalar(0.0), true, false)
net?.setInput(inputBlob)
val outputBlob = net?.forward() ?: return
println(outputBlob)
}
fun setupDetector() {
val modelFile = File(context.cacheDir, MODEL_NAME)
if (!modelFile.exists()) {
try {
val inputStream = context.assets.open(MODEL_NAME)
val size = inputStream.available()
val buffer = ByteArray(size)
inputStream.read(buffer)
inputStream.close()
val outputStream = FileOutputStream(modelFile)
outputStream.write(buffer)
outputStream.close()
net = Dnn.readNetFromONNX(modelFile.absolutePath)
} catch (e: Exception) {
throw RuntimeException(e)
}
} else {
net = Dnn.readNetFromONNX(modelFile.absolutePath)
}
}
companion object {
private const val MODEL_NAME = "model.onnx"
private const val TENSOR_WIDTH = 640
private const val TENSOR_HEIGHT = 640
}
}
What could be the general approach to get bounding box, the confidence score and class labels? And if you have any solution for onnx model with OpenCV then you can provide as well. Also this question isn't android specific.
With the suggestion that I got from comments, I dig into the YOLOv8 and this is the solution that I came up with.
val mat = Mat()
Utils.bitmapToMat(croppedBitmap, mat)
Imgproc.cvtColor(mat, mat, Imgproc.COLOR_RGBA2RGB)
val inputBlob = Dnn.blobFromImage(
mat,
1.0/255.0,
Size(TENSOR_WIDTH_DOUBLE, TENSOR_HEIGHT_DOUBLE),
Scalar(0.0),
false,
false
)
net?.setInput(inputBlob)
val outputBlob = net?.forward() ?: return
val strip = outputBlob.reshape(1, outputBlob.size(1))
val transposedMat = Mat()
Core.transpose(strip, transposedMat)
val boundingBoxes = mutableListOf<BoundingBox>()
for (i in 0 until transposedMat.rows()) {
if (transposedMat.get(i, 4)[0] > CONFIDENCE_THRESHOLD) {
boundingBoxes.add(
BoundingBox(
transposedMat.get(i, 0)[0],
transposedMat.get(i, 1)[0],
transposedMat.get(i, 2)[0],
transposedMat.get(i, 3)[0],
transposedMat.get(i, 4)[0]
)
)
}
}
data class BoundingBox(
val centerX: Double,
val centerY: Double,
val width: Double,
val height: Double,
val confidence: Double
)