Search code examples
swiftmacosmetal

Compositing multiple CVPixelBuffers with alpha channels in a Metal render pass


I'm trying to draw multiple CVPixelBuffers in a Metal render pass. The CVPixelBuffers have an Alpha channel. I would expect that the top image would be overlayed so that it's alpha pixels are transparent, but solid pixels are opaque. However, in practice, my current code is just making the top image uniformly transparent (as if the entire image has an alpha of 0.5 or so).

I've played around with lots of permutations of the blend parameters on the MTLRenderPipelineDescriptor but I haven't reached the right thing yet.

Here's my current code (apologies for the length, but it's hard to get a complete example in Metal without a reasonable amount of boilerplate):

import SwiftUI
import MetalKit

struct ContentView: View {
    var body: some View {
        MetalViewRep()
            .frame(width: 300, height: 300)
            .padding()
    }
}

struct MetalViewRep: NSViewRepresentable {
    func makeNSView(context: Context) -> MetalCompositorView {
        MetalCompositorView()
    }
    
    func updateNSView(_ nsView: MetalCompositorView, context: Context) { }
}

final class MetalCompositorView: MTKView {
    private let commandQueue: MTLCommandQueue
    
    private var inputBuffers: [CVPixelBuffer] = [
        NSImage(named: "lion")?.pixelBuffer(),
        NSImage(named: "lesson")?.pixelBuffer()
    ].compactMap { $0 }
    private var vertexBuffer: MTLBuffer?
    private var pipelineState: MTLRenderPipelineState?
    
    private var textureCache: CVMetalTextureCache?
    
    private static var vertices: [Vertex] = [
        .init(position: .init(x: -1, y: -1), textureCoordinate: .init(x: 0, y: 1)),
        .init(position: .init(x: 1, y: -1), textureCoordinate: .init(x: 1, y: 1)),
        .init(position: .init(x: -1, y: 1), textureCoordinate: .init(x: 0, y: 0)),
        .init(position: .init(x: 1, y: 1), textureCoordinate: .init(x: 1, y: 0)),
    ]
    
    init() {
        let device = MTLCreateSystemDefaultDevice()!
        
        var textureCacheTemp: CVMetalTextureCache?
        guard CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, device, nil, &textureCacheTemp) == kCVReturnSuccess,
            let textureCache: CVMetalTextureCache = textureCacheTemp
        else {
            fatalError()
        }
        
        self.textureCache = textureCache
        commandQueue = device.makeCommandQueue()!
        super.init(frame: .zero, device: device)
        setupMetalPipeline()
        self.delegate = self
    }
    
    private func setupMetalPipeline() {
        guard let device else { fatalError() }
        
        let pipelineDescriptor = MTLRenderPipelineDescriptor()
        
        let library = device.makeDefaultLibrary()
        pipelineDescriptor.vertexFunction = library?.makeFunction(name: "vertexPassthrough")
        pipelineDescriptor.fragmentFunction = library?.makeFunction(name: "fragmentShader")
        
        pipelineDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
        pipelineDescriptor.colorAttachments[0].isBlendingEnabled = true
        pipelineDescriptor.colorAttachments[0].rgbBlendOperation = .add
        pipelineDescriptor.colorAttachments[0].alphaBlendOperation = .add
        pipelineDescriptor.colorAttachments[0].sourceAlphaBlendFactor = .sourceAlpha
        pipelineDescriptor.colorAttachments[0].sourceRGBBlendFactor = .sourceColor
        pipelineDescriptor.colorAttachments[0].destinationAlphaBlendFactor = .one
        pipelineDescriptor.colorAttachments[0].destinationRGBBlendFactor = .one
        
        do {
            self.pipelineState = try device.makeRenderPipelineState(descriptor: pipelineDescriptor)
        } catch {
            fatalError()
        }
        
        vertexBuffer = device.makeBuffer(bytes: Self.vertices, length: Self.vertices.count * MemoryLayout<Vertex>.stride, options: [])!
    }
    
    required init(coder: NSCoder) {
        fatalError("init(coder:) has not been implemented")
    }
}

extension MetalCompositorView: MTKViewDelegate {
    func mtkView(_: MTKView, drawableSizeWillChange _: CGSize) {}

    func draw(in view: MTKView) {
        guard let commandBuffer = commandQueue.makeCommandBuffer() else { return }
        guard let renderPassDescriptor = view.currentRenderPassDescriptor else { return }
        guard let renderCommandEncoder =
                commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor)
        else { return }
        
        guard let pipelineState, let vertexBuffer else {
            fatalError()
        }
        
        renderCommandEncoder.setRenderPipelineState(pipelineState)
        
        for inputBuffer in inputBuffers {
            let sourceTexture = makeTextureFromPixelBuffer(pixelBuffer: inputBuffer)
            renderCommandEncoder.setFragmentTexture(sourceTexture, index: 0)
            renderCommandEncoder.setVertexBuffer(vertexBuffer, offset: 0, index: 0)
            renderCommandEncoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)
        }
        
        renderCommandEncoder.endEncoding()
        
        commandBuffer.present(view.currentDrawable!)
        commandBuffer.commit()
    }
    
    func makeTextureFromPixelBuffer(pixelBuffer: CVPixelBuffer) -> MTLTexture {
        guard let textureCache else {
            fatalError()
        }
        let width = CVPixelBufferGetWidth(pixelBuffer)
        let height = CVPixelBufferGetHeight(pixelBuffer)
        let format: MTLPixelFormat = .bgra8Unorm
        var metalTexture: CVMetalTexture?

        let status = CVMetalTextureCacheCreateTextureFromImage(nil,
                                                               textureCache,
                                                               pixelBuffer,
                                                               nil,
                                                               format,
                                                               width,
                                                               height,
                                                               0,
                                                               &metalTexture)

        guard status == kCVReturnSuccess,
            let unwrappedMetalTexture: CVMetalTexture = metalTexture,
            let texture: MTLTexture = CVMetalTextureGetTexture(unwrappedMetalTexture)
        else {
            fatalError()
        }

        return texture
    }
}

/// Extension to turn an `NSImage` into a `CVPixelBuffer`
extension NSImage {
    func pixelBuffer() -> CVPixelBuffer? {
        let width = self.size.width
        let height = self.size.height
        let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue,
                     kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue,
                     kCVPixelBufferIOSurfaceCoreAnimationCompatibilityKey: kCFBooleanTrue,
                       kCVPixelBufferMetalCompatibilityKey: kCFBooleanTrue,
        ] as CFDictionary
        var pixelBuffer: CVPixelBuffer?
        let status = CVPixelBufferCreate(kCFAllocatorDefault,
                                         Int(width),
                                         Int(height),
                                         kCVPixelFormatType_32BGRA,
                                         attrs,
                                         &pixelBuffer)
        
        guard let resultPixelBuffer = pixelBuffer, status == kCVReturnSuccess else {
            return nil
        }
        
        CVPixelBufferLockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
        let pixelData = CVPixelBufferGetBaseAddress(resultPixelBuffer)
        
        let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
        guard let context = CGContext(data: pixelData,
                                      width: Int(width),
                                      height: Int(height),
                                      bitsPerComponent: 8,
                                      bytesPerRow: CVPixelBufferGetBytesPerRow(resultPixelBuffer),
                                      space: rgbColorSpace,
                                      bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) else {return nil}
        
        let graphicsContext = NSGraphicsContext(cgContext: context, flipped: false)
        NSGraphicsContext.saveGraphicsState()
        NSGraphicsContext.current = graphicsContext
        draw(in: CGRect(x: 0, y: 0, width: width, height: height))
        NSGraphicsContext.restoreGraphicsState()
        
        CVPixelBufferUnlockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
        
        return resultPixelBuffer
    }
}

And the Metal code itself:

#ifndef ShaderDefinitions_h
#define ShaderDefinitions_h

#include <simd/simd.h>

typedef struct {
    packed_float2 position;
    packed_float2 textureCoordinate;
} Vertex;

#endif /* ShaderDefinitions_h */
#include <metal_stdlib>
using namespace metal;

#include "ShaderDefinitions.h"

typedef struct {
    float4 position[[position]];
    float2 textureCoordinate;
} VertexOut;

vertex VertexOut vertexPassthrough(constant Vertex *vertices[[buffer(0)]],
                                  unsigned int vid[[vertex_id]]) {
    VertexOut out;
    constant Vertex &v = vertices[vid];
    out.position = float4(float2(v.position), 0.0, 1.0); // x, y, z, w coordinate on the texture
    out.textureCoordinate = v.textureCoordinate;
    return out;
}

fragment float4 fragmentShader(
                               VertexOut in[[stage_in]],
                               texture2d<float, access::sample> texture [[ texture(0) ]]
                               )
{
    constexpr sampler qsampler;
    float4 color = texture.sample(qsampler, in.textureCoordinate);
    return color.bgra;
}

In my actual app, there could be an arbitrary number of CVPixelBuffers coming in, so I don't believe a solution where I just pass each buffer as a parameter to the fragmentShader will work, since I don't know the number ahead of time.

Question:

How can I render the images so that opaque areas remain opaque but transparent areas show through to the lower images?

Expected result:

enter image description here

Actual result:

enter image description here

Image assets:

enter image description here enter image description here


In response to the comments, wat the GPU debugger sees with the "lesson" texture: enter image description here


Solution

  • I figured it out with help from the follow website, which makes it easy to quickly visualize some of the differences with blending options:

    https://www.andersriggelsen.dk/glblendfunc.php

    The site is geared towards OpenGL, but the general principals apply to Metal as well

    Here are the working blend options:

    pipelineDescriptor.colorAttachments[0].isBlendingEnabled = true
    pipelineDescriptor.colorAttachments[0].rgbBlendOperation = .add
    pipelineDescriptor.colorAttachments[0].alphaBlendOperation = .add
    pipelineDescriptor.colorAttachments[0].sourceAlphaBlendFactor = .one
    pipelineDescriptor.colorAttachments[0].sourceRGBBlendFactor = .one
    pipelineDescriptor.colorAttachments[0].destinationAlphaBlendFactor = .one
    pipelineDescriptor.colorAttachments[0].destinationRGBBlendFactor = .oneMinusSourceAlpha