Search code examples
iosswiftavfoundation

How do you create a new AVAsset video that consists of only frames from given `CMTimeRange`s of another video?


Apple's sample code Identifying Trajectories in Video contains the following delegate callback:

func cameraViewController(_ controller: CameraViewController, didReceiveBuffer buffer: CMSampleBuffer, orientation: CGImagePropertyOrientation) {
    let visionHandler = VNImageRequestHandler(cmSampleBuffer: buffer, orientation: orientation, options: [:])
    
    if gameManager.stateMachine.currentState is GameManager.TrackThrowsState {
        DispatchQueue.main.async {
            // Get the frame of rendered view
            let normalizedFrame = CGRect(x: 0, y: 0, width: 1, height: 1)
            self.jointSegmentView.frame = controller.viewRectForVisionRect(normalizedFrame)
            self.trajectoryView.frame = controller.viewRectForVisionRect(normalizedFrame)
        }
        // Perform the trajectory request in a separate dispatch queue.
        trajectoryQueue.async {
            do {
                try visionHandler.perform([self.detectTrajectoryRequest])
                if let results = self.detectTrajectoryRequest.results {
                    DispatchQueue.main.async {
                        self.processTrajectoryObservations(controller, results)
                    }
                }
            } catch {
                AppError.display(error, inViewController: self)
            }
        }
    } 
}

However, instead of drawing UI whenever detectTrajectoryRequest.results exist (https://developer.apple.com/documentation/vision/vndetecttrajectoriesrequest/3675672-results), I'm interested in using the CMTimeRange provided by each result to construct a new video. In effect, this would filter down the original video to only frames with trajectories.

What would be a good approach to transferring only frames with trajectories from an AVAssetReader to an AVAssetWriter?


Solution

  • By the time you identify a trajectory in captured video frames or from frames decoded from a file you may not have the initial frames in memory any more, so the easiest way to create your file containing only trajectories is to keep the original file on hand, and then insert its trajectory snippets into an AVComposition which you then export using AVAssetExportSession.

    This sample captures frames from the camera, encodes them to a file whilst analysing them for trajectories and after 20 seconds, it closes the file and then creates the new file containing only trajectory snippets.

    If you're interested in detecting trajectories in a pre-existing file, it's not too hard to rewire this code.

    import UIKit
    import AVFoundation
    import Vision
    
    class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
        let session = AVCaptureSession()
        
        var assetWriter: AVAssetWriter!
        var assetWriterInput: AVAssetWriterInput!
        var assetWriterStartTime: CMTime = .zero
        var assetWriterStarted = false
    
        var referenceFileURL: URL!
        var timeRangesOfInterest: [Double : CMTimeRange] = [:]
    
        func startWritingFile(outputURL: URL, initialSampleBuffer: CMSampleBuffer) {
            try? FileManager.default.removeItem(at: outputURL)
            assetWriter = try! AVAssetWriter(outputURL: outputURL, fileType: .mov)
    
            let dimensions = initialSampleBuffer.formatDescription!.dimensions
            assetWriterInput = AVAssetWriterInput(mediaType: .video, outputSettings: [AVVideoCodecKey: AVVideoCodecType.h264, AVVideoWidthKey: dimensions.width, AVVideoHeightKey: dimensions.height])
            
            assetWriter.add(assetWriterInput)
    
            assetWriter.startWriting()
            
            self.assetWriterStartTime = CMSampleBufferGetPresentationTimeStamp(initialSampleBuffer)
            assetWriter.startSession(atSourceTime: self.assetWriterStartTime)
        }
        
        func stopWritingFile(completion: @escaping (() -> Void)) {
            let assetWriterToFinish = self.assetWriter!
            self.assetWriterInput = nil
            self.assetWriter = nil
                    
            assetWriterToFinish.finishWriting {
                print("finished writing: \(assetWriterToFinish.status.rawValue)")
                completion()
            }
        }
        
        func exportVideoTimeRanges(inputFileURL: URL, outputFileURL: URL, timeRanges: [CMTimeRange]) {
            let inputAsset = AVURLAsset(url: inputFileURL)
            let inputVideoTrack = inputAsset.tracks(withMediaType: .video).first!
            
            let composition = AVMutableComposition()
            
            let compositionTrack = composition.addMutableTrack(withMediaType: .video, preferredTrackID: kCMPersistentTrackID_Invalid)!
            
            var insertionPoint: CMTime = .zero
            for timeRange in timeRanges {
                try! compositionTrack.insertTimeRange(timeRange, of: inputVideoTrack, at: insertionPoint)
                insertionPoint = insertionPoint + timeRange.duration
            }
            
            let exportSession = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality)!
            try? FileManager.default.removeItem(at: outputFileURL)
            exportSession.outputURL = outputFileURL
            exportSession.outputFileType = .mov
            exportSession.exportAsynchronously {
                print("export finished: \(exportSession.status.rawValue) - \(exportSession.error)")
            }
        }
        
        override func viewDidLoad() {
            super.viewDidLoad()
                    
            let inputDevice = AVCaptureDevice.default(for: .video)!
            let input = try! AVCaptureDeviceInput(device: inputDevice)
            let output = AVCaptureVideoDataOutput()
            
            output.setSampleBufferDelegate(self, queue: DispatchQueue.main)
            
            session.addInput(input)
            session.addOutput(output)
            
            session.startRunning()
            
            DispatchQueue.main.asyncAfter(deadline: .now() + 20) {
                self.stopWritingFile {
                    print("finished writing")
                    
                    let trajectoriesFileURL = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] .appendingPathComponent("trajectories.mov")
    
                    self.exportVideoTimeRanges(inputFileURL: self.referenceFileURL, outputFileURL: trajectoriesFileURL, timeRanges: self.timeRangesOfInterest.map { $0.1 })
                }
            }
        }
        
        // Lazily create a single instance of VNDetectTrajectoriesRequest.
        private lazy var request: VNDetectTrajectoriesRequest = {
            return VNDetectTrajectoriesRequest(frameAnalysisSpacing: .zero,
                                               trajectoryLength: 10,
                                               completionHandler: completionHandler)
        }()
        
        // AVCaptureVideoDataOutputSampleBufferDelegate callback.
        func captureOutput(_ output: AVCaptureOutput,
                           didOutput sampleBuffer: CMSampleBuffer,
                           from connection: AVCaptureConnection) {
            if !assetWriterStarted {
                self.referenceFileURL = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] .appendingPathComponent("reference.mov")
    
                startWritingFile(outputURL: self.referenceFileURL, initialSampleBuffer: sampleBuffer)
                assetWriterStarted = true
            }
            
            if assetWriterInput != nil && assetWriterInput.isReadyForMoreMediaData {
                assetWriterInput.append(sampleBuffer)
            }
            
            do {
                let requestHandler = VNImageRequestHandler(cmSampleBuffer: sampleBuffer)
                try requestHandler.perform([request])
            } catch {
                // Handle the error.
            }
        }
        
        func completionHandler(request: VNRequest, error: Error?) {
            guard let request = request as? VNDetectTrajectoriesRequest else { return }
    
            if let results = request.results,
               results.count > 0 {
                NSLog("\(results)")
                for result in results {
                    var fileRelativeTimeRange = result.timeRange
                    fileRelativeTimeRange.start = fileRelativeTimeRange.start - self.assetWriterStartTime
                    self.timeRangesOfInterest[fileRelativeTimeRange.start.seconds] = fileRelativeTimeRange
                }
            }
        }
    }