I am using AVFoundation to merge two videos into one. The result of my attempt is a single video with a length equal to the sum of all the clips, and displaying a black screen.
Here is my code:
public void mergeclips()
{
AVMutableComposition mixComposition = new AVMutableComposition();
CMTime previous_asset_duration = CMTime.Zero;
CMTime AllAssetDurations = CMTime.Zero;
AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[Clips.Count];
foreach (string clip in Clips)
{
#region HoldVideoTrack
AVAsset asset = AVAsset.FromUrl(NSUrl.FromFilename(clip));
AVMutableCompositionTrack Track = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
CMTimeRange range = new CMTimeRange()
{
Start = new CMTime(0, 0),
Duration = asset.Duration
};
AVAssetTrack track = asset.TracksWithMediaType(AVMediaType.Video)[0];
Track.InsertTimeRange(range, track, previous_asset_duration, out NSError error);
#endregion
#region Instructions
// 7
var Instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(Track);
Instruction.SetOpacity(0, asset.Duration);
// 8
Instruction_Array[Clips.IndexOf(clip)] = Instruction;
#endregion
previous_asset_duration = asset.Duration;
AllAssetDurations = asset.Duration;
}
// 6
var mainInstruction = new List<AVMutableVideoCompositionInstruction>();
CMTimeRange rangeIns = new CMTimeRange()
{
Start = new CMTime(0, 0),
Duration = AllAssetDurations
};
mainInstruction[0].TimeRange = rangeIns;
mainInstruction[0].LayerInstructions = Instruction_Array;
var mainComposition = new AVMutableVideoComposition();
mainComposition.Instructions = mainInstruction.ToArray();
mainComposition.FrameDuration = new CMTime(1, 30);
mainComposition.RenderSize = new CoreGraphics.CGSize(UIScreen.MainScreen.Bounds.Width, UIScreen.MainScreen.Bounds.Height);
//... export video ...
AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
{
OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")),
OutputFileType = AVFileType.QuickTimeMovie,
ShouldOptimizeForNetworkUse = true,
//APP crashes here
VideoComposition = mainComposition
};
exportSession.ExportAsynchronously(_OnExportDone);
}
private static void _OnExportDone()
{
var library = new ALAssetsLibrary();
library.WriteVideoToSavedPhotosAlbum(NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")), (path, e2) =>
{
if (e2 != null)
{
new UIAlertView("Error", e2.ToString(), null, "OK", null).Show();
}
else
{
}
});
}
EDIT: I added more code, specifically, I added "ShouldOptimizeForNetworkUse" and VideoCompositions to the AVAssetExportSession. I am using List instead of AVMutableVideoCompositionInstruction because AVMutableVideoComposition.Instructions requires a class of type AVVideoCompositionInstructions[]. With the previous code the App crashes at the following line "VideoComposition = mainComposition"
EDIT: After including transformations for the instructions and making the corrections that Shawn pointed out, I can merge 2 or more videos and save the common video to a file. Unfortunately, the root problem remains, the final video displays only the backgroundColor of AVMutableVideoCompositionInstruction, not all the clips as we would expect. The audio of these videos is also ignored, I don't know if this has to be added apart or not, but knowing it might also be helpful.
Here is my code:
public void mergeclips()
{
AVMutableComposition mixComposition = new AVMutableComposition();
AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[Clips.Count];
foreach (string clip in Clips)
{
#region HoldVideoTrack
AVAsset asset = AVAsset.FromUrl(NSUrl.FromFilename(clip));
AVMutableCompositionTrack Track = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
CMTimeRange range = new CMTimeRange()
{
Start = new CMTime(0, 0),
Duration = asset.Duration
};
AVAssetTrack track = asset.TracksWithMediaType(AVMediaType.Video)[0];
Track.InsertTimeRange(range, track, mixComposition.Duration, out NSError error);
#endregion
#region Instructions
Instruction_Array[Clips.IndexOf(clip)] = SetInstruction(asset, mixComposition.Duration, Track);
#endregion
}
// 6
var mainInstruction = new AVMutableVideoCompositionInstruction();
CMTimeRange rangeIns = new CMTimeRange()
{
Start = new CMTime(0, 0),
Duration = mixComposition.Duration
};
mainInstruction.BackgroundColor = UIColor.FromRGBA(1f, 1f, 1f, 1.000f).CGColor;
mainInstruction.TimeRange = rangeIns;
mainInstruction.LayerInstructions = Instruction_Array;
var mainComposition = new AVMutableVideoComposition()
{
Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
FrameDuration = new CMTime(1, 30),
RenderSize = new CoreGraphics.CGSize(UIScreen.MainScreen.Bounds.Width, UIScreen.MainScreen.Bounds.Height)
};
//... export video ...
AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
{
OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")),
OutputFileType = AVFileType.QuickTimeMovie,
ShouldOptimizeForNetworkUse = true,
VideoComposition = mainComposition
};
exportSession.ExportAsynchronously(_OnExportDone);
}
private AVMutableVideoCompositionLayerInstruction SetInstruction(AVAsset asset, CMTime currentTime, AVMutableCompositionTrack assetTrack)
{
var instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(assetTrack);
var transform = assetTrack.PreferredTransform;
var transformSize = assetTrack.NaturalSize; //for export session
var newAssetSize = new CoreGraphics.CGSize(transformSize.Width, transformSize.Height); // for export session
if (newAssetSize.Width > newAssetSize.Height)//portrait
{
//Starting here, all newassetsize have its height and width inverted, height should be width and vice versa
var scaleRatio = UIScreen.MainScreen.Bounds.Height / newAssetSize.Width;
var _coreGraphic = new CoreGraphics.CGAffineTransform(0, 0, 0, 0, 0, 0);
_coreGraphic.Scale(scaleRatio, scaleRatio);
var tx = UIScreen.MainScreen.Bounds.Width / 2 - newAssetSize.Height * scaleRatio / 2;
var ty = UIScreen.MainScreen.Bounds.Height / 2 - newAssetSize.Width * scaleRatio / 2;
_coreGraphic.Translate(tx, ty);
instruction.SetTransform(_coreGraphic, currentTime);
}
var endTime = CMTime.Add(currentTime, asset.Duration);
instruction.SetOpacity(0, endTime);
return instruction;
}
EDIT: Several mistakes in the code were corrected thanks to Shawn's help. The problem remains (the resulting video has no image)
Here is my code:
public void mergeclips()
{
//microphone
AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);
AVMutableComposition mixComposition = new AVMutableComposition();
AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[Clips.Count];
foreach (string clip in Clips)
{
#region HoldVideoTrack
AVAsset asset = AVAsset.FromUrl(NSUrl.FromFilename(clip));
CMTimeRange range = new CMTimeRange()
{
Start = new CMTime(0, 0),
Duration = asset.Duration
};
AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
AVAssetTrack assetVideoTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];
videoTrack.InsertTimeRange(range, assetVideoTrack, mixComposition.Duration, out NSError error);
if (microphone != null)
{
AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
AVAssetTrack assetAudioTrack = asset.TracksWithMediaType(AVMediaType.Audio)[0];
audioTrack.InsertTimeRange(range, assetAudioTrack, mixComposition.Duration, out NSError error2);
}
#endregion
#region Instructions
Instruction_Array[Clips.IndexOf(clip)] = SetInstruction(asset, mixComposition.Duration, videoTrack);
#endregion
}
// 6
var mainInstruction = new AVMutableVideoCompositionInstruction();
CMTimeRange rangeIns = new CMTimeRange()
{
Start = new CMTime(0, 0),
Duration = mixComposition.Duration
};
mainInstruction.BackgroundColor = UIColor.FromRGBA(1f, 1f, 1f, 1.000f).CGColor;
mainInstruction.TimeRange = rangeIns;
mainInstruction.LayerInstructions = Instruction_Array;
var mainComposition = new AVMutableVideoComposition()
{
Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
FrameDuration = new CMTime(1, 30),
RenderSize = new CoreGraphics.CGSize(UIScreen.MainScreen.Bounds.Width, UIScreen.MainScreen.Bounds.Height)
};
//... export video ...
AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
{
OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")),
OutputFileType = AVFileType.QuickTimeMovie,
ShouldOptimizeForNetworkUse = true,
VideoComposition = mainComposition
};
exportSession.ExportAsynchronously(_OnExportDone);
}
private AVMutableVideoCompositionLayerInstruction SetInstruction(AVAsset asset, CMTime currentTime, AVMutableCompositionTrack mixComposition_video_Track)
{
//The following code triggers when a device has no camera or no microphone (for instance an emulator)
var instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(mixComposition_video_Track);
//Get the individual AVAsset's track to use for transform
AVAssetTrack assetTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];
//Set transform the the preferredTransform of the AVAssetTrack, not the AVMutableCompositionTrack
CGAffineTransform transform = assetTrack.PreferredTransform;
//Set the transformSize to be the asset natural size AFTER applying preferredTransform.
CGSize transformSize = transform.TransformSize(assetTrack.NaturalSize);
//Handle any negative values resulted from applying transform by using the absolute value
CGSize newAssetSize = new CoreGraphics.CGSize(Math.Abs(transformSize.Width), Math.Abs(transformSize.Height));
//change back to less than
if (newAssetSize.Width < newAssetSize.Height)//portrait
{
/*newAssetSize should no longer be inverted since preferredTransform handles this. Remember that the asset was never
* actually transformed yet. newAssetSize just represents the size the video is going to be after you call
* instruction.setTransform(transform). Since transform is the first transform in concatenation, this is the size that
* the scale and translate transforms will be using, which is why we needed to reference newAssetSize after applying
* transform. Also you should concatenate in this order: transform -> scale -> translate, otherwise you won't get
* desired results*/
nfloat scaleRatio = UIScreen.MainScreen.Bounds.Height / newAssetSize.Height;
//Apply scale to transform. Transform is never actually applied unless you do this.
transform.Scale(scaleRatio, scaleRatio);
nfloat tx = UIScreen.MainScreen.Bounds.Width / 2 - newAssetSize.Width * scaleRatio / 2;
nfloat ty = UIScreen.MainScreen.Bounds.Height / 2 - newAssetSize.Height * scaleRatio / 2;
transform.Translate(tx, ty);
instruction.SetTransform(transform, currentTime);
}
var endTime = CMTime.Add(currentTime, asset.Duration);
instruction.SetOpacity(0, endTime);
return instruction;
}
Ok, so thanks to Shawn's help I have accomplished what I was trying to do.
There were 2 main mistakes in my code that generated this problem, the first one was how the property of the CMTime given to VideoTrack was set: Start = new CMTime(0,0),
instead of Start = new CMTime.Zero,
. I still don't know what difference does it make, but it prevented the code from displaying the video and the audio of each asset, leaving a video with the length of all the clips combined and the background of AVMutableVideoCompositionInstruction.
The second mistake was how I set the instructions, the configuration that worked for me can be found in the following code.
Here is the final function working as correctly:
public void MergeClips()
{
//microphone
AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);
AVMutableComposition mixComposition = AVMutableComposition.Create();
AVVideoCompositionLayerInstruction[] Instruction_Array = new AVVideoCompositionLayerInstruction[Clips.Count];
foreach (string clip in Clips)
{
var asset = AVUrlAsset.FromUrl(new NSUrl(clip, false)) as AVUrlAsset;
#region HoldVideoTrack
//This range applies to the video, not to the mixcomposition
CMTimeRange range = new CMTimeRange()
{
Start = CMTime.Zero,
Duration = asset.Duration
};
var duration = mixComposition.Duration;
NSError error;
AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
AVAssetTrack assetVideoTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];
videoTrack.InsertTimeRange(range, assetVideoTrack, duration, out error);
videoTrack.PreferredTransform = assetVideoTrack.PreferredTransform;
if (microphone != null)
{
AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
AVAssetTrack assetAudioTrack = asset.TracksWithMediaType(AVMediaType.Audio)[0];
audioTrack.InsertTimeRange(range, assetAudioTrack, duration, out error);
}
#endregion
#region Instructions
int counter = Clips.IndexOf(clip);
Instruction_Array[counter] = SetInstruction(asset, mixComposition.Duration, videoTrack);
#endregion
}
// 6
AVMutableVideoCompositionInstruction mainInstruction = AVMutableVideoCompositionInstruction.Create() as AVMutableVideoCompositionInstruction;
CMTimeRange rangeIns = new CMTimeRange()
{
Start = new CMTime(0, 0),
Duration = mixComposition.Duration
};
mainInstruction.TimeRange = rangeIns;
mainInstruction.LayerInstructions = Instruction_Array;
var mainComposition = AVMutableVideoComposition.Create();
mainComposition.Instructions = new AVVideoCompositionInstruction[1] { mainInstruction };
mainComposition.FrameDuration = new CMTime(1, 30);
mainComposition.RenderSize = new CGSize(mixComposition.NaturalSize.Height, mixComposition.NaturalSize.Width);
finalVideo_path = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "Whole2.mov"));
if (File.Exists(Path.GetTempPath() + "Whole2.mov"))
{
File.Delete(Path.GetTempPath() + "Whole2.mov");
}
//... export video ...
AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.HighestQuality)
{
OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "Whole2.mov")),
OutputFileType = AVFileType.QuickTimeMovie,
ShouldOptimizeForNetworkUse = true,
VideoComposition = mainComposition
};
exportSession.ExportAsynchronously(_OnExportDone);
}
private AVMutableVideoCompositionLayerInstruction SetInstruction(AVAsset asset, CMTime currentTime, AVAssetTrack mixComposition_video_Track)
{
var instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(mixComposition_video_Track);
var startTime = CMTime.Subtract(currentTime, asset.Duration);
//NaturalSize.Height is passed as a width parameter because IOS stores the video recording horizontally
CGAffineTransform translateToCenter = CGAffineTransform.MakeTranslation(mixComposition_video_Track.NaturalSize.Height, 0);
//Angle in radiants, not in degrees
CGAffineTransform rotate = CGAffineTransform.Rotate(translateToCenter, (nfloat)(Math.PI / 2));
instruction.SetTransform(rotate, (CMTime.Subtract(currentTime, asset.Duration)));
instruction.SetOpacity(1, startTime);
instruction.SetOpacity(0, currentTime);
return instruction;
}
As I said I solved my problem thanks to Shawn's help, and most of this code was translated to C# from his answers, so please, if you were planning on voting up this answer, vote up Shawn's one instead, or both.