I have around 50 3D LUTs (stored as png images, each being 900KB in size) and use CIColorCube filter to generate a filtered image. I use UICollectionView to display filtered thumbnails (100x100) for each LUT (like in Photos app). The problem is UICollectionView scrolling becomes extremely slow(no where close to smoothness of Photos app) when I generate filtered images as user scrolls. I thought of pre generating filtered images but the problem is it takes around 150 milliseconds to generate cubeData from LUT png, so for 50 thumbnails it takes around 7-8 seconds to prepare filtered thumbnails which is long. And this is exactly the culprit for scrolling performance as well. I am wondering what I can do to make it smooth like in Photos app or other photo editing apps. Here is my code to generate cube data from LUT png. I believe there is more of a CoreImage/Metal trick to fix the issue than UIKit/DispatchQueue/NSOperation based fixes.
public static func colorCubeDataFromLUTPNGImage(_ image : UIImage, lutSize:Int) -> Data? {
let size = lutSize
let lutImage = image.cgImage!
let lutWidth = lutImage.width
let lutHeight = lutImage.height
let rowCount = lutHeight / size
let columnCount = lutWidth / size
if ((lutWidth % size != 0) || (lutHeight % size != 0) || (rowCount * columnCount != size)) {
NSLog("Invalid colorLUT")
return nil
}
let bitmap = getBytesFromImage(image: image)!
let floatSize = MemoryLayout<Float>.size
let cubeData = UnsafeMutablePointer<Float>.allocate(capacity: size * size * size * 4 * floatSize)
var z = 0
var bitmapOffset = 0
for _ in 0 ..< rowCount {
for y in 0 ..< size {
let tmp = z
for _ in 0 ..< columnCount {
for x in 0 ..< size {
let alpha = Float(bitmap[bitmapOffset]) / 255.0
let red = Float(bitmap[bitmapOffset+1]) / 255.0
let green = Float(bitmap[bitmapOffset+2]) / 255.0
let blue = Float(bitmap[bitmapOffset+3]) / 255.0
let dataOffset = (z * size * size + y * size + x) * 4
cubeData[dataOffset + 3] = alpha
cubeData[dataOffset + 2] = red
cubeData[dataOffset + 1] = green
cubeData[dataOffset + 0] = blue
bitmapOffset += 4
}
z += 1
}
z = tmp
}
z += columnCount
}
let colorCubeData = Data(bytesNoCopy: cubeData, count: size * size * size * 4 * floatSize, deallocator: Data.Deallocator.free)
return colorCubeData
}
fileprivate static func getBytesFromImage(image:UIImage?) -> [UInt8]?
{
var pixelValues: [UInt8]?
if let imageRef = image?.cgImage {
let width = Int(imageRef.width)
let height = Int(imageRef.height)
let bitsPerComponent = 8
let bytesPerRow = width * 4
let totalBytes = height * bytesPerRow
let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue | CGBitmapInfo.byteOrder32Little.rawValue
let colorSpace = CGColorSpaceCreateDeviceRGB()
var intensities = [UInt8](repeating: 0, count: totalBytes)
let contextRef = CGContext(data: &intensities, width: width, height: height, bitsPerComponent: bitsPerComponent, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
contextRef?.draw(imageRef, in: CGRect(x: 0.0, y: 0.0, width: CGFloat(width), height: CGFloat(height)))
pixelValues = intensities
}
return pixelValues!
}
And here is my code for UICollectionViewCell setup:
func collectionView(_ collectionView: UICollectionView, cellForItemAt indexPath: IndexPath) -> UICollectionViewCell {
let lutPath = self.lutPaths[indexPath.item]
let cell = collectionView.dequeueReusableCell(withReuseIdentifier: "FilterCell", for: indexPath) as! FilterCell
if let lutImage = UIImage(contentsOfFile: lutPath) {
let renderer = CIFilter(name: "CIColorCube")!
let lutData = ColorCubeHelper.colorCubeDataFromLUTPNGImage(lutImage, lutSize: 64)
renderer.setValue(lutData!, forKey: "inputCubeData")
renderer.setValue(64, forKey: "inputCubeDimension")
renderer.setValue(inputCIImage, forKey: kCIInputImageKey)
let outputImage = renderer.outputImage!
let cgImage = self.ciContext.createCGImage(outputImage, from: outputImage.extent)!
cell.configure(image: UIImage(cgImage: cgImage))
} else {
NSLog("LUT not found at \(indexPath.item)")
}
return cell
}
I finally got it working by using vDSP functions which makes cubeData generation superfast, so fast that scrolling goes smooth on iPhone X without using any background queues for loading textures!
public static func cubeDataForLut64(_ lutImage: NSImage) -> Data? {
guard let lutCgImage = lutImage.cgImage else {
return nil
}
return cubeDataForLut64(lutCgImage)
}
private static func cubeDataForLut64(_ lutImage: CGImage) -> Data? {
let cubeDimension = 64
let cubeSize = (cubeDimension * cubeDimension * cubeDimension * MemoryLayout<Float>.size * 4)
let imageWidth = lutImage.width
let imageHeight = lutImage.height
let rowCount = imageHeight / cubeDimension
let columnCount = imageWidth / cubeDimension
guard ((imageWidth % cubeDimension == 0) || (imageHeight % cubeDimension == 0) || (rowCount * columnCount == cubeDimension)) else {
print("Invalid LUT")
return nil
}
let bitmapData = createRGBABitmapFromImage(lutImage)
let cubeData = UnsafeMutablePointer<Float>.allocate(capacity: cubeSize)
var bitmapOffset: Int = 0
var z: Int = 0
for _ in 0 ..< rowCount{ // ROW
for y in 0 ..< cubeDimension{
let tmp = z
for _ in 0 ..< columnCount{ // COLUMN
let dataOffset = (z * cubeDimension * cubeDimension + y * cubeDimension) * 4
var divider: Float = 255.0
vDSP_vsdiv(&bitmapData[bitmapOffset], 1, ÷r, &cubeData[dataOffset], 1, UInt(cubeDimension) * 4)
bitmapOffset += cubeDimension * 4
z += 1
}
z = tmp
}
z += columnCount
}
free(bitmapData)
return Data(bytesNoCopy: cubeData, count: cubeSize, deallocator: .free)
}
fileprivate static func createRGBABitmapFromImage(_ image: CGImage) -> UnsafeMutablePointer<Float> {
let bitsPerPixel = 32
let bitsPerComponent = 8
let bytesPerPixel = bitsPerPixel / bitsPerComponent // 4 bytes = RGBA
let imageWidth = image.width
let imageHeight = image.height
let bitmapBytesPerRow = imageWidth * bytesPerPixel
let bitmapByteCount = bitmapBytesPerRow * imageHeight
let colorSpace = CGColorSpaceCreateDeviceRGB()
let bitmapData = malloc(bitmapByteCount)
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue).rawValue
let context = CGContext(data: bitmapData, width: imageWidth, height: imageHeight, bitsPerComponent: bitsPerComponent, bytesPerRow: bitmapBytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
let rect = CGRect(x: 0, y: 0, width: imageWidth, height: imageHeight)
context?.draw(image, in: rect)
// Convert UInt8 byte array to single precision Float's
let convertedBitmap = malloc(bitmapByteCount * MemoryLayout<Float>.size)
vDSP_vfltu8(UnsafePointer<UInt8>(bitmapData!.assumingMemoryBound(to: UInt8.self)), 1,
UnsafeMutablePointer<Float>(convertedBitmap!.assumingMemoryBound(to: Float.self)), 1,
vDSP_Length(bitmapByteCount))
free(bitmapData)
return UnsafeMutablePointer<Float>(convertedBitmap!.assumingMemoryBound(to: Float.self))
}