I am trying to develop an image segmentation app and process the live camera view in my coreml model. However I see some slowness on the output. Camera view with masked prediction is slower. Below is my vision manager class to predict the pixelbuffer and function calling this class to convert to colors before proceed to camera output. Anyone facing this issue before? Do you see an error in my code causing slowness?
Vision Manager Class:
class VisionManager: NSObject {
static let shared = VisionManager()
static let MODEL = ba_224_segm().model
private lazy var predictionRequest: VNCoreMLRequest = {
do{
let model = try VNCoreMLModel(for: VisionManager.MODEL)
let request = VNCoreMLRequest(model: model)
request.imageCropAndScaleOption = VNImageCropAndScaleOption.centerCrop
return request
} catch {
fatalError("can't load Vision ML Model")
}
}()
func predict(pixelBuffer: CVImageBuffer, sampleBuffer: CMSampleBuffer, onResult: ((_ observations: [VNCoreMLFeatureValueObservation]) -> Void)) {
var requestOptions: [VNImageOption: Any] = [:]
if let cameraIntrinsicData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil) {
requestOptions = [.cameraIntrinsics: cameraIntrinsicData]
}
let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: requestOptions)
do {
try handler.perform([predictionRequest])
} catch {
print("error handler")
}
guard let observations = predictionRequest.results as? [VNCoreMLFeatureValueObservation] else {
fatalError("unexpected result type from VNCoreMLRequest")
}
onResult(observations)
}
Predicted Camera Output function:
func handleCameraOutput(pixelBuffer: CVImageBuffer, sampleBuffer: CMSampleBuffer, onFinish: @escaping ((_ image: UIImage?) -> Void)) {
VisionManager.shared.predict(pixelBuffer: pixelBuffer, sampleBuffer: sampleBuffer) { [weak self ] (observations) in
if let multiArray: MLMultiArray = observations[0].featureValue.multiArrayValue {
mask = maskEdit.maskToRGBA(maskArray: MultiArray<Float32>(multiArray), rgba: (Float(r),Float(g),Float(b),Float(a)))!
maskInverted = maskEdit.maskToRGBAInvert(maskArray: MultiArray<Float32>(multiArray), rgba: (r: 1.0, g: 1.0, b:1.0, a: 0.4))!
let image = maskEdit.mergeMaskAndBackground( invertedMask: maskInverted, mask: mask, background: pixelBuffer, size: Int(size))
DispatchQueue.main.async {
onFinish(image)
}
}
}
I call these models under viwDidAppear as below:
CameraManager.shared.setDidOutputHandler { [weak self] (output, pixelBuffer, sampleBuffer, connection) in
self!.maskColor.getRed(&self!.r, green:&self!.g, blue:&self!.b, alpha:&self!.a)
self!.a = 0.5
self?.handleCameraOutput(pixelBuffer: pixelBuffer, sampleBuffer: sampleBuffer, onFinish: { (image) in
self?.predictionView.image = image
})
}
I have found out my issue about not using different thread. Since I am new developer I don't know such details and still learning thanks to experts in the field and their shared knowledge. Please see my old and new captureOutput function. To use a different thread solved my problem:
old status:
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
else { return }
self.handler?(output, pixelBuffer, sampleBuffer, connection)
self.onCapture?(pixelBuffer, sampleBuffer)
self.onCapture = nil
}
and new status:
public func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
if currentBuffer == nil{
let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
currentBuffer = pixelBuffer
DispatchQueue.global(qos: .userInitiated).async {
self.handler?(output, self.currentBuffer!, sampleBuffer, connection)
self.currentBuffer = nil
}
}
}