I've tried almost everything, @EnvironmentObject
, @StateObject
, global variables etc.
I have an enum to track and return the names of the models I have:
enum MLNameModels: String, CaseIterable {
case audi = "Audi"
case bmw = "BMW"
var mlModel: MLModel {
switch self {
case .audi:
return try! Audi(configuration: MLModelConfiguration()).model
case .bmw:
return try! BMW(configuration: MLModelConfiguration()).model
}
}
}
I am using, well trying to use live detection when looking around, and have a CameraView
that deals with all the setting up of the view and even the Vision framework.
struct CameraView : UIViewControllerRepresentable {
func makeUIViewController(context: UIViewControllerRepresentableContext<CameraView>) -> UIViewController {
let controller = CameraViewController()
return controller
}
func updateUIViewController(_ uiViewController: CameraView.UIViewControllerType, context: UIViewControllerRepresentableContext<CameraView>) { }
}
There's one line in my CameraViewController that sets up the object detection:
var objectDetector = Object_Detector(modelWithName: "audi")
I know it say's Audi by that's because I just reverted back to what's working.
Inside the object detector class is the following:
class Object_Detector {
// MARK: Properties
var requests = [VNRequest]()
var boundingBox = CGRect()
var objectType: ObservationTypeEnum?
var firstObservation = VNRecognizedObjectObservation()
init(modelWithName modelName: String) {
self.setupModel(withFilename: modelName)
}
// MARK: Methods
private func setupModel(withFilename modelName: String) {
// Get model URL
guard let modelURL = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") else {
NSLog("Error: Unable to find model with name\(modelName), in \(Bundle.main.bundlePath)")
return
}
// Create desired model
guard let model = try? VNCoreMLModel(for: MLModel(contentsOf: modelURL)) else {
NSLog("Error: Failed to create model->line:\(#line)")
return
}
// Perform a request using ML Model
let objectRecognizerRequests = VNCoreMLRequest(model: model) { (request, err) in
if let error = err {
NSLog("Error: \(error.localizedDescription)")
return
}
// Get observation results
guard let results = request.results as? [VNRecognizedObjectObservation] else {
NSLog("Error: Failed to extract request results as [VNRecognizedObjectObservation]")
return
}
// Get first observation result (one with the greatest confidence)
guard let firstResult = results.first else { return }
self.firstObservation = firstResult
self.objectType = ObservationTypeEnum(fromRawValue: firstResult.labels.first!.identifier)
self.boundingBox = firstResult.boundingBox
}
// Save requests
self.requests = [objectRecognizerRequests]
}
}
I have tested to see if changes occur, and even call the setUpModel()
function after selecting an option, which does indeed indicate that the modelName parameter has been updated yet for some reason my app seems to just instantly go to the last model, in this case BMW.
I first prompt users to select a manufacturer, and then after that, nothing seems to work. Only works when I hard code the value, as shown above.
To clarify, I don't want to merge results etc. I simply want the app to know which manufacturer the user has selected, then grab the corresponding model and proceed with recognising.
Edit - User prompt
Upon application loading, a sheet is presented iterating over all model cases, t I hope it's helpful enough:
public var selectedModelName = String()
struct ContentView: View {
@State private var isPresented: Bool = false
var model = MLNameModels.allCases
var body: some View {
ZStack(alignment: .top) {
if selectedModelName.isEmpty {
CameraView().edgesIgnoringSafeArea(.all)
}
VStack(alignment: .leading){
Spacer()
HStack {
Button {
isPresented = true
print("Tapped")
} label: {
Image(systemName: "square.and.arrow.up")
.resizable()
.frame(width: 24, height: 30)
.padding()
.background(Color.secondary.clipShape(Circle()))
}
Spacer()
}
}.padding()
}
.slideOverCard(isPresented: $isPresented) {
VStack {
ForEach(model, id: \.self) { modelName in
Text(modelName.rawValue)
.onTapGesture {
selectedModelName = modelName.rawValue
isPresented = false
}
}
}
.frame(width: UIScreen.main.bounds.width * 0.85)
}
.onAppear {
if selectedModelName.isEmpty {
isPresented = true
}
}
}
}
And as a result, I then declare:
var objectDetector = Object_Detector(modelWithName: selectedModelName)
But only seems to load the other model, and I cannot switch the model at all.
Here's my CameraViewController:
class CameraViewController : UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
var bufferSize: CGSize = .zero
var rootLayer: CALayer! = nil
private var detectionOverlay: CALayer! = nil
private let session = AVCaptureSession()
private let videoDataOutput = AVCaptureVideoDataOutput()
private let videoOutputQueue = DispatchQueue(label: "Video_Output")
private var previewLayer: AVCaptureVideoPreviewLayer! = nil
// Initializing Model
var objectDetector = Object_Detector(modelWithName: selectedModelName)
override func viewDidLoad() {
super.viewDidLoad()
loadCamera()
setupLayers()
updateLayerGeometry()
self.session.startRunning()
}
func loadCamera() {
guard let videoDevice = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: .video, position: .back).devices.first else { return }
guard let videoDeviceInput = try? AVCaptureDeviceInput(device: videoDevice) else {
print("NO CAMERA DETECTED")
return
}
// Begin session config
self.session.beginConfiguration()
self.session.sessionPreset = .hd1920x1080
guard self.session.canAddInput(videoDeviceInput) else {
NSLog("Could not add video device input to the session")
self.session.commitConfiguration()
return
}
// Add video input
self.session.addInput(videoDeviceInput)
if session.canAddOutput(self.videoDataOutput) {
// Add a video data output
self.session.addOutput(videoDataOutput)
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)]
videoDataOutput.setSampleBufferDelegate(self, queue: self.videoOutputQueue)
} else {
NSLog("Could not add video data output to the session")
session.commitConfiguration()
return
}
guard let captureConnection = videoDataOutput.connection(with: .video) else { return }
// Always process the frames
captureConnection.isEnabled = true
do {
try videoDevice.lockForConfiguration()
let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice.activeFormat.formatDescription))
// Read frame dimensions
self.bufferSize.width = CGFloat(dimensions.width)
self.bufferSize.height = CGFloat(dimensions.height)
videoDevice.unlockForConfiguration()
} catch {
NSLog(error.localizedDescription)
}
// Save session config
session.commitConfiguration()
previewLayer = AVCaptureVideoPreviewLayer(session: session)
previewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
rootLayer = view.layer
previewLayer.frame = rootLayer.bounds
rootLayer.addSublayer(previewLayer)
}
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Get buffer with image data
guard let buffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
NSLog("Error: Failed to get image buffer->\(#line)")
return
}
// Get device orientation
let deviceOrientation = self.exifOrientationFromDeviceOrientation()
// Create an image request handler
let requestHandler = VNImageRequestHandler(cvPixelBuffer: buffer, orientation: deviceOrientation, options: [:])
do {
try requestHandler.perform(self.objectDetector.requests)
// Adding bounding box
let boundingBox = self.objectDetector.boundingBox
let objectType = self.objectDetector.objectType
if !boundingBox.isEmpty && objectType != nil {
DispatchQueue.main.async {
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
self.detectionOverlay.sublayers = nil
let objectBounds = VNImageRectForNormalizedRect(boundingBox, Int(self.bufferSize.width), Int(self.bufferSize.height))
let shapeLayer = self.createBoundingBox(withBounds: objectBounds)
let textLayer = self.createTextBox(withBounds: objectBounds)
shapeLayer.addSublayer(textLayer)
self.detectionOverlay.addSublayer(shapeLayer)
self.updateLayerGeometry()
CATransaction.commit()
}
}
} catch {
NSLog("Error: Unable to perform requests")
}
}
func createBoundingBox(withBounds bounds: CGRect) -> CALayer {
let shapeLayer = CALayer()
let borderColor = self.objectDetector.objectType?.getColor()
shapeLayer.bounds = bounds
shapeLayer.position = CGPoint(x: bounds.midX, y: bounds.midY)
shapeLayer.name = "Found Object"
shapeLayer.borderColor = borderColor
shapeLayer.borderWidth = 2.5
shapeLayer.cornerRadius = 5.0
return shapeLayer
}
func createTextBox(withBounds bounds: CGRect) -> CATextLayer {
let textLayer = CATextLayer()
textLayer.name = "Object Label"
let formattedString = NSMutableAttributedString(string: String(format: "\(self.objectDetector.firstObservation.labels[0].identifier)"))
let backgroundColor = UIColor(cgColor: self.objectDetector.objectType!.getColor())
let largeFont = UIFont(name: "AvenirNext-Medium", size: 40.0)!
formattedString.addAttributes([NSAttributedString.Key.font: largeFont, NSAttributedString.Key.foregroundColor: UIColor.white, NSAttributedString.Key.backgroundColor: backgroundColor], range: NSRange(location: 0, length: self.objectDetector.firstObservation.labels[0].identifier.count))
textLayer.string = formattedString
textLayer.bounds = CGRect(x: 0, y: 0, width: bounds.size.height, height: 50)
textLayer.position = CGPoint(x: bounds.minX - 25, y: bounds.maxY)
textLayer.contentsScale = 2.0
textLayer.cornerRadius = 5.0
textLayer.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: 1.0, y: -1.0))
return textLayer
}
func setupLayers() {
detectionOverlay = CALayer()
detectionOverlay.name = "DetectionOverlay"
detectionOverlay.bounds = CGRect(x: 0.0, y: 0.0, width: bufferSize.width, height: bufferSize.height)
detectionOverlay.position = CGPoint(x: rootLayer.bounds.midX, y: rootLayer.bounds.midY)
rootLayer.addSublayer(detectionOverlay)
}
func updateLayerGeometry() {
let bounds = rootLayer.bounds
var scale: CGFloat
let xScale: CGFloat = bounds.size.width / bufferSize.height
let yScale: CGFloat = bounds.size.height / bufferSize.width
scale = fmax(xScale, yScale)
if scale.isInfinite {
scale = 1.0
}
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
// Rotate the layer into screen orientation and scale and mirror
detectionOverlay.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: scale, y: -scale))
// Center the layer
detectionOverlay.position = CGPoint(x: bounds.midX, y: bounds.midY)
CATransaction.commit()
}
// Specify device orientation
private func exifOrientationFromDeviceOrientation() -> CGImagePropertyOrientation {
let curDeviceOrientation = UIDevice.current.orientation
let exifOrientation: CGImagePropertyOrientation
switch curDeviceOrientation {
case UIDeviceOrientation.portraitUpsideDown: // Device oriented vertically, home button on the top
exifOrientation = .left
case UIDeviceOrientation.landscapeLeft: // Device oriented horizontally, home button on the right
exifOrientation = .upMirrored
case UIDeviceOrientation.landscapeRight: // Device oriented horizontally, home button on the left
exifOrientation = .down
case UIDeviceOrientation.portrait: // Device oriented vertically, home button on the bottom
exifOrientation = .up
default:
exifOrientation = .up
}
return exifOrientation
}
}
As you mentioned in your question/comments, you're currently just taking the initial value and setting up your model based on it, never responding to or listening for any changes later on.
I couldn't recreate all of your code, since there are a lot of missing types, etc, but the following should give you an idea of how to accomplish propagating state changes through the views and objects. See inline comments.
enum MLNameModels: String, CaseIterable { //simplified just for the example
case audi = "Audi"
case bmw = "BMW"
}
struct ContentView : View {
@State var model : String //@State, so that the view knows to update
var body: some View {
VStack {
CameraView(modelName: model) //note that it gets passed to CameraView here
VStack {
ForEach(MLNameModels.allCases, id: \.self) { modelName in
Text(modelName.rawValue)
.onTapGesture {
model = modelName.rawValue
}
}
}
}
}
}
struct CameraView : UIViewControllerRepresentable {
var modelName : String //gets updated when the parent state changes
func makeUIViewController(context: Context) -> CameraViewController {
return CameraViewController(modelName: modelName) //initial value
}
func updateUIViewController(_ uiViewController: CameraViewController, context: Context) {
uiViewController.modelName = modelName //gets called when modelName changes or the parent re-renders
}
}
class CameraViewController : UIViewController {
var objectDetector : Object_Detector
var modelName : String = "" {
didSet {
objectDetector.modelName = modelName //update modelName on the objectDetector when a new modelName is passed through
}
}
init(modelName: String) {
self.objectDetector = Object_Detector(modelWithName: modelName)
super.init(nibName: nil, bundle: nil)
}
required init?(coder: NSCoder) {
fatalError("init(coder:) has not been implemented")
}
}
class Object_Detector {
var modelName : String = "" {
didSet {
self.setupModel(withFilename: modelName) //call setupModel when there is a new modelName
}
}
init(modelWithName modelName: String) {
self.modelName = modelName
}
private func setupModel(withFilename modelName: String) {
//do setup
}
}