Search code examples
swiftuicoreml

Switch between CoreModel file


I've tried almost everything, @EnvironmentObject, @StateObject, global variables etc.

I have an enum to track and return the names of the models I have:

enum MLNameModels: String, CaseIterable {
    case audi = "Audi"
    case bmw = "BMW"
    var mlModel: MLModel {
        switch self {
        case .audi:
            return try! Audi(configuration: MLModelConfiguration()).model
        case .bmw:
            return try! BMW(configuration: MLModelConfiguration()).model
        }
    }
}

I am using, well trying to use live detection when looking around, and have a CameraView that deals with all the setting up of the view and even the Vision framework.

struct CameraView : UIViewControllerRepresentable {
    func makeUIViewController(context: UIViewControllerRepresentableContext<CameraView>) -> UIViewController {
        let controller = CameraViewController()

        return controller
    }
    
    func updateUIViewController(_ uiViewController: CameraView.UIViewControllerType, context: UIViewControllerRepresentableContext<CameraView>) { }
}

There's one line in my CameraViewController that sets up the object detection:

 var objectDetector = Object_Detector(modelWithName: "audi")

I know it say's Audi by that's because I just reverted back to what's working.

Inside the object detector class is the following:

class Object_Detector {
    
    // MARK: Properties
    var requests = [VNRequest]()
    
    var boundingBox = CGRect()
    var objectType: ObservationTypeEnum?
    var firstObservation = VNRecognizedObjectObservation()
    
    init(modelWithName modelName: String) {
        self.setupModel(withFilename: modelName)
    }
    
    // MARK: Methods
    private func setupModel(withFilename modelName: String) {
        // Get model URL
        guard let modelURL = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") else {
            NSLog("Error: Unable to find model with name\(modelName), in \(Bundle.main.bundlePath)")
            
            return
        }
        
        // Create desired model
        guard let model = try? VNCoreMLModel(for: MLModel(contentsOf: modelURL)) else {
            NSLog("Error: Failed to create model->line:\(#line)")
            
            return
        }
        
        // Perform a request using ML Model
        let objectRecognizerRequests = VNCoreMLRequest(model: model) { (request, err) in
            if let error = err {
                NSLog("Error: \(error.localizedDescription)")
                
                return
            }
            
            // Get observation results
            guard let results = request.results as? [VNRecognizedObjectObservation] else {
                NSLog("Error: Failed to extract request results as [VNRecognizedObjectObservation]")
                
                return
            }
            
            // Get first observation result (one with the greatest confidence)
            guard let firstResult = results.first else { return }
            
            self.firstObservation = firstResult
            self.objectType = ObservationTypeEnum(fromRawValue: firstResult.labels.first!.identifier)
            self.boundingBox = firstResult.boundingBox
        }
        
        // Save requests
        self.requests = [objectRecognizerRequests]
    }
}

I have tested to see if changes occur, and even call the setUpModel() function after selecting an option, which does indeed indicate that the modelName parameter has been updated yet for some reason my app seems to just instantly go to the last model, in this case BMW.

I first prompt users to select a manufacturer, and then after that, nothing seems to work. Only works when I hard code the value, as shown above.

To clarify, I don't want to merge results etc. I simply want the app to know which manufacturer the user has selected, then grab the corresponding model and proceed with recognising.

Edit - User prompt

Upon application loading, a sheet is presented iterating over all model cases, t I hope it's helpful enough:

public var selectedModelName = String()
struct ContentView: View {
    @State private var isPresented: Bool = false
    var model = MLNameModels.allCases
    var body: some View {
        ZStack(alignment: .top) {
            if selectedModelName.isEmpty {
            CameraView().edgesIgnoringSafeArea(.all)
            }
            VStack(alignment: .leading){
                Spacer()
                HStack {
                Button {
                    isPresented = true
                    print("Tapped")
                } label: {
                    Image(systemName: "square.and.arrow.up")
                        .resizable()
                        .frame(width: 24, height: 30)
                        .padding()
                        .background(Color.secondary.clipShape(Circle()))
                        
                }
                    Spacer()
                }
            }.padding()
        }
        .slideOverCard(isPresented: $isPresented) {
            VStack {
                ForEach(model, id: \.self) { modelName in
                    Text(modelName.rawValue)
                        .onTapGesture {
               
                            selectedModelName = modelName.rawValue
                            isPresented = false
                        }
                }

            }
            .frame(width: UIScreen.main.bounds.width * 0.85)
        }
        .onAppear {
            if selectedModelName.isEmpty {
                isPresented = true
            }
        }
    }
}

And as a result, I then declare:

var objectDetector = Object_Detector(modelWithName: selectedModelName)

But only seems to load the other model, and I cannot switch the model at all.

Here's my CameraViewController:

class CameraViewController : UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
    var bufferSize: CGSize = .zero
    var rootLayer: CALayer! = nil
    
    private var detectionOverlay: CALayer! = nil
    
    private let session = AVCaptureSession()
    private let videoDataOutput = AVCaptureVideoDataOutput()
    private let videoOutputQueue = DispatchQueue(label: "Video_Output")
    
    private var previewLayer: AVCaptureVideoPreviewLayer! = nil
    
    // Initializing Model
    var objectDetector = Object_Detector(modelWithName: selectedModelName)
    
    override func viewDidLoad() {
        super.viewDidLoad()
        
        loadCamera()
        setupLayers()
        updateLayerGeometry()
        
        self.session.startRunning()
    }
    
    func loadCamera() {
        
        guard let videoDevice = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: .video, position: .back).devices.first else { return }
        
        guard let videoDeviceInput = try? AVCaptureDeviceInput(device: videoDevice) else {
            print("NO CAMERA DETECTED")
            return
        }
        
        // Begin session config
        self.session.beginConfiguration()
        self.session.sessionPreset = .hd1920x1080
        
        guard self.session.canAddInput(videoDeviceInput) else {
            NSLog("Could not add video device input to the session")
            self.session.commitConfiguration()
            return
        }
        // Add video input
        self.session.addInput(videoDeviceInput)
        
        if session.canAddOutput(self.videoDataOutput) {
            // Add a video data output
            self.session.addOutput(videoDataOutput)
            videoDataOutput.alwaysDiscardsLateVideoFrames = true
            videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)]
            videoDataOutput.setSampleBufferDelegate(self, queue: self.videoOutputQueue)
        } else {
            NSLog("Could not add video data output to the session")
            session.commitConfiguration()
            return
        }
        
        guard let captureConnection = videoDataOutput.connection(with: .video) else { return }
        
        // Always process the frames
        captureConnection.isEnabled = true
        
        do {
            try videoDevice.lockForConfiguration()
            
            let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice.activeFormat.formatDescription))
            // Read frame dimensions
            self.bufferSize.width = CGFloat(dimensions.width)
            self.bufferSize.height = CGFloat(dimensions.height)
            
            videoDevice.unlockForConfiguration()
        } catch {
            NSLog(error.localizedDescription)
        }
        
        // Save session config
        session.commitConfiguration()
        
        previewLayer = AVCaptureVideoPreviewLayer(session: session)
        previewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
        rootLayer = view.layer
        previewLayer.frame = rootLayer.bounds
        rootLayer.addSublayer(previewLayer)
    }
    
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        // Get buffer with image data
        guard let buffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
            NSLog("Error: Failed to get image buffer->\(#line)")
            
            return
        }
        
        // Get device orientation
        let deviceOrientation = self.exifOrientationFromDeviceOrientation()
        
        // Create an image request handler
        let requestHandler = VNImageRequestHandler(cvPixelBuffer: buffer, orientation: deviceOrientation, options: [:])
        
        do {
            try requestHandler.perform(self.objectDetector.requests)
            
            // Adding bounding box
            let boundingBox = self.objectDetector.boundingBox
            let objectType = self.objectDetector.objectType
        
            if !boundingBox.isEmpty && objectType != nil {
                DispatchQueue.main.async {
                    CATransaction.begin()
                    CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
                    self.detectionOverlay.sublayers = nil
                    
                    let objectBounds = VNImageRectForNormalizedRect(boundingBox, Int(self.bufferSize.width), Int(self.bufferSize.height))
                    
                    let shapeLayer = self.createBoundingBox(withBounds: objectBounds)
                    
                    let textLayer = self.createTextBox(withBounds: objectBounds)
                    
                    shapeLayer.addSublayer(textLayer)
                    self.detectionOverlay.addSublayer(shapeLayer)
                    
                    self.updateLayerGeometry()
                    CATransaction.commit()
                }
            }
        } catch {
            NSLog("Error: Unable to perform requests")
        }
    }
    
    func createBoundingBox(withBounds bounds: CGRect) -> CALayer {
        let shapeLayer = CALayer()
        let borderColor = self.objectDetector.objectType?.getColor()
        
        shapeLayer.bounds = bounds
        shapeLayer.position = CGPoint(x: bounds.midX, y: bounds.midY)
        shapeLayer.name = "Found Object"
        shapeLayer.borderColor = borderColor
        shapeLayer.borderWidth = 2.5
        shapeLayer.cornerRadius = 5.0
        
        return shapeLayer
    }
    
    func createTextBox(withBounds bounds: CGRect) -> CATextLayer {
        let textLayer = CATextLayer()
        textLayer.name = "Object Label"
        let formattedString = NSMutableAttributedString(string: String(format: "\(self.objectDetector.firstObservation.labels[0].identifier)"))
        let backgroundColor = UIColor(cgColor: self.objectDetector.objectType!.getColor())
        let largeFont = UIFont(name: "AvenirNext-Medium", size: 40.0)!
        
        formattedString.addAttributes([NSAttributedString.Key.font: largeFont, NSAttributedString.Key.foregroundColor: UIColor.white, NSAttributedString.Key.backgroundColor: backgroundColor], range: NSRange(location: 0, length: self.objectDetector.firstObservation.labels[0].identifier.count))
        
        textLayer.string = formattedString
        textLayer.bounds = CGRect(x: 0, y: 0, width: bounds.size.height, height: 50)
        textLayer.position = CGPoint(x: bounds.minX - 25, y: bounds.maxY)
        textLayer.contentsScale = 2.0
        textLayer.cornerRadius = 5.0
        
        textLayer.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: 1.0, y: -1.0))
        
        return textLayer
    }
    
    func setupLayers() {
        detectionOverlay = CALayer()
        detectionOverlay.name = "DetectionOverlay"
        detectionOverlay.bounds = CGRect(x: 0.0, y: 0.0, width: bufferSize.width, height: bufferSize.height)
        detectionOverlay.position = CGPoint(x: rootLayer.bounds.midX, y: rootLayer.bounds.midY)
        rootLayer.addSublayer(detectionOverlay)
    }
    
    func updateLayerGeometry() {
        let bounds = rootLayer.bounds
        var scale: CGFloat
        
        let xScale: CGFloat = bounds.size.width / bufferSize.height
        let yScale: CGFloat = bounds.size.height / bufferSize.width
        
        scale = fmax(xScale, yScale)
        if scale.isInfinite {
            scale = 1.0
        }
        
        CATransaction.begin()
        CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
        
        // Rotate the layer into screen orientation and scale and mirror
        detectionOverlay.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: scale, y: -scale))
        
        // Center the layer
        detectionOverlay.position = CGPoint(x: bounds.midX, y: bounds.midY)
        
        CATransaction.commit()
    }
    
    // Specify device orientation
    private func exifOrientationFromDeviceOrientation() -> CGImagePropertyOrientation {
        let curDeviceOrientation = UIDevice.current.orientation
        let exifOrientation: CGImagePropertyOrientation
        
        switch curDeviceOrientation {
        case UIDeviceOrientation.portraitUpsideDown:  // Device oriented vertically, home button on the top
            exifOrientation = .left
        case UIDeviceOrientation.landscapeLeft:       // Device oriented horizontally, home button on the right
            exifOrientation = .upMirrored
        case UIDeviceOrientation.landscapeRight:      // Device oriented horizontally, home button on the left
            exifOrientation = .down
        case UIDeviceOrientation.portrait:            // Device oriented vertically, home button on the bottom
            exifOrientation = .up
        default:
            exifOrientation = .up
        }
        return exifOrientation
    }
}

Solution

  • As you mentioned in your question/comments, you're currently just taking the initial value and setting up your model based on it, never responding to or listening for any changes later on.

    I couldn't recreate all of your code, since there are a lot of missing types, etc, but the following should give you an idea of how to accomplish propagating state changes through the views and objects. See inline comments.

    
    enum MLNameModels: String, CaseIterable { //simplified just for the example
        case audi = "Audi"
        case bmw = "BMW"
    }
    
    struct ContentView : View {
        @State var model : String //@State, so that the view knows to update
        
        var body: some View {
            VStack {
                CameraView(modelName: model) //note that it gets passed to CameraView here
                VStack {
                    ForEach(MLNameModels.allCases, id: \.self) { modelName in
                        Text(modelName.rawValue)
                            .onTapGesture {
                                model = modelName.rawValue
                            }
                    }
                }
            }
        }
    }
    
    struct CameraView : UIViewControllerRepresentable {
        var modelName : String //gets updated when the parent state changes
        
        func makeUIViewController(context: Context) -> CameraViewController {
            return CameraViewController(modelName: modelName) //initial value
        }
        
        func updateUIViewController(_ uiViewController: CameraViewController, context: Context) {
            uiViewController.modelName = modelName //gets called when modelName changes or the parent re-renders
        }
    }
    
    class CameraViewController : UIViewController {
        var objectDetector : Object_Detector
        
        var modelName : String = "" {
            didSet {
                objectDetector.modelName = modelName //update modelName on the objectDetector when a new modelName is passed through
            }
        }
        
        init(modelName: String) {
            self.objectDetector = Object_Detector(modelWithName: modelName)
            super.init(nibName: nil, bundle: nil)
        }
        
        required init?(coder: NSCoder) {
            fatalError("init(coder:) has not been implemented")
        }
    }
    
    class Object_Detector {
        var modelName : String = "" {
            didSet {
                self.setupModel(withFilename: modelName) //call setupModel when there is a new modelName
            }
        }
        
        init(modelWithName modelName: String) {
            self.modelName = modelName
        }
        
        private func setupModel(withFilename modelName: String) {
            //do setup
        }
    }