Search code examples
c++directxh.264ms-media-foundation

IMFTransform::ProcessInput() "The buffer was too small to carry out the requested action."


I am trying to encode a texture with IMFTransform to H264. I can write and encode textures no problem to a file with SinkWriter and play the video and everything, works great. But I am trying to learn how to use IMFTransform so I can access the encoded IMFSamples themselves.

Unfortunately, I didn't end up getting too far because ProcessInput is failing with "The buffer was too small to carry out the requested action." as the HRESULT.

I have no clue which "buffer" it is referring to, and doing a search of that error turns up absolutely no results. No other calls return a bad HRESULT except ProcessInput(), and SinkWriter works fine. So I have absolutely ZERO clue what the problem is.

#include "main.h"
#include "WinDesktopDup.h"
#include <iostream>
#include <wmcodecdsp.h>

WinDesktopDup dup;

void SetupDpiAwareness()
{
    if (!SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_SYSTEM_AWARE))
        printf("SetProcessDpiAwarenessContext failed\n");
}

const UINT32 VIDEO_WIDTH = 3840;
const UINT32 VIDEO_HEIGHT = 2160;
const UINT32 VIDEO_FPS = 120;
const UINT64 VIDEO_FRAME_DURATION = 10 * 1000 * 1000 / VIDEO_FPS;
const UINT32 VIDEO_BIT_RATE = 800000;
const GUID   VIDEO_ENCODING_FORMAT = MFVideoFormat_H264;
const GUID   VIDEO_INPUT_FORMAT = MFVideoFormat_ARGB32;
const UINT32 VIDEO_PELS = VIDEO_WIDTH * VIDEO_HEIGHT;
const UINT32 VIDEO_FRAME_COUNT = 20 * VIDEO_FPS;

template <class T>
void SafeRelease(T** ppT) {
    if (*ppT) {
        (*ppT)->Release();
        *ppT = NULL;
    }
}

bool usingEncoder;
IMFMediaType* pMediaTypeOut = NULL;
IMFMediaType* pMediaTypeIn = NULL;
HRESULT SetMediaType()
{
    // Set the output media type.
    HRESULT hr = MFCreateMediaType(&pMediaTypeOut);
    if (!SUCCEEDED(hr)) { printf("MFCreateMediaType failed\n"); }
    hr = pMediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
    if (!SUCCEEDED(hr)) { printf("SetGUID failed\n"); }
    hr = pMediaTypeOut->SetGUID(MF_MT_SUBTYPE, VIDEO_ENCODING_FORMAT);
    if (!SUCCEEDED(hr)) { printf("SetGUID (2) failed\n"); }
    hr = pMediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, VIDEO_BIT_RATE);
    if (!SUCCEEDED(hr)) { printf("SetUINT32 (3) failed\n"); }
    hr = pMediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
    if (!SUCCEEDED(hr)) { printf("SetUINT32 (4) failed\n"); }
    hr = MFSetAttributeSize(pMediaTypeOut, MF_MT_FRAME_SIZE, VIDEO_WIDTH, VIDEO_HEIGHT);
    if (!SUCCEEDED(hr)) { printf("MFSetAttributeSize failed\n"); }
    hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
    if (!SUCCEEDED(hr)) { printf("MFSetAttributeRatio failed\n"); }
    hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
    if (!SUCCEEDED(hr)) { printf("MFSetAttributeRatio (2) failed\n"); }
    

    // Set the input media type.
    hr = MFCreateMediaType(&pMediaTypeIn);
    if (!SUCCEEDED(hr)) { printf("MFCreateMediaType failed\n"); }
    hr = pMediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
    if (!SUCCEEDED(hr)) { printf("SetGUID (3) failed\n"); }
    hr = pMediaTypeIn->SetGUID(MF_MT_SUBTYPE, VIDEO_INPUT_FORMAT);
    if (!SUCCEEDED(hr)) { printf("SetGUID (4) failed\n"); }
    hr = pMediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
    if (!SUCCEEDED(hr)) { printf("SetUINT32 (5) failed\n"); }
    hr = MFSetAttributeSize(pMediaTypeIn, MF_MT_FRAME_SIZE, VIDEO_WIDTH, VIDEO_HEIGHT);
    if (!SUCCEEDED(hr)) { printf("MFSetAttributeSize (2) failed\n"); }
    hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
    if (!SUCCEEDED(hr)) { printf("MFSetAttributeRatio (3) failed\n"); }
    hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
    if (!SUCCEEDED(hr)) { printf("MFSetAttributeRatio (4) failed\n"); }
    
    return hr;
}

HRESULT InitializeSinkWriter(IMFSinkWriter** ppWriter, DWORD* pStreamIndex)
{
    IMFDXGIDeviceManager* pDeviceManager = NULL;
    UINT                  resetToken;
    IMFAttributes* attributes;

    *ppWriter = NULL;
    *pStreamIndex = NULL;

    IMFSinkWriter* pSinkWriter = NULL;
    
    DWORD          streamIndex;

    HRESULT hr = MFCreateDXGIDeviceManager(&resetToken, &pDeviceManager);
    if (!SUCCEEDED(hr)) { printf("MFCreateDXGIDeviceManager failed\n"); }
    hr = pDeviceManager->ResetDevice(dup.D3DDevice, resetToken);
    if (!SUCCEEDED(hr)) { printf("ResetDevice failed\n"); }

    hr = MFCreateAttributes(&attributes, 3);
    if (!SUCCEEDED(hr)) { printf("MFCreateAttributes failed\n"); }
    hr = attributes->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, 1);
    if (!SUCCEEDED(hr)) { printf("SetUINT32 failed\n"); }
    hr = attributes->SetUINT32(MF_LOW_LATENCY, 1);
    if (!SUCCEEDED(hr)) { printf("SetUINT32 (2) failed\n"); }
    hr = attributes->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, pDeviceManager);
    if (!SUCCEEDED(hr)) { printf("SetUnknown failed\n"); }
    hr = MFCreateSinkWriterFromURL(L"output.mp4", NULL, attributes, &pSinkWriter);
    if (!SUCCEEDED(hr)) { printf("MFCreateSinkWriterFromURL failed\n"); }

    hr = pSinkWriter->AddStream(pMediaTypeOut, &streamIndex);
    if (!SUCCEEDED(hr)) { printf("AddStream failed\n"); }

    hr = pSinkWriter->SetInputMediaType(streamIndex, pMediaTypeIn, NULL);
    if (!SUCCEEDED(hr)) { printf("SetInputMediaType failed\n"); }

    // Tell the sink writer to start accepting data.
    hr = pSinkWriter->BeginWriting();
    if (!SUCCEEDED(hr)) { printf("BeginWriting failed\n"); }

    // Return the pointer to the caller.
    *ppWriter = pSinkWriter;
    (*ppWriter)->AddRef();
    *pStreamIndex = streamIndex;

    SafeRelease(&pSinkWriter);
    SafeRelease(&pMediaTypeOut);
    SafeRelease(&pMediaTypeIn);
    return hr;
}

IUnknown* _transformUnk;
IMFTransform* pMFTransform;
HRESULT InitializeEncoder(DWORD* pStreamIndex)
{
    HRESULT hr = CoCreateInstance(CLSID_CMSH264EncoderMFT, NULL, CLSCTX_INPROC_SERVER, IID_IUnknown, (void**)&_transformUnk);
    if (!SUCCEEDED(hr)) { printf("CoCreateInstance failed\n"); }
    hr = _transformUnk->QueryInterface(IID_PPV_ARGS(&pMFTransform));
    if (!SUCCEEDED(hr)) { printf("QueryInterface failed\n"); }
    
    hr = pMFTransform->SetOutputType(0, pMediaTypeOut, 0);
    if (!SUCCEEDED(hr)) { printf("SetOutputType failed\n"); }

    hr = pMFTransform->SetInputType(0, pMediaTypeIn, 0);
    if (!SUCCEEDED(hr)) { printf("SetInputType failed\n"); }


    DWORD mftStatus = 0;
    hr = pMFTransform->GetInputStatus(0, &mftStatus);
    if (!SUCCEEDED(hr)) { printf("GetInputStatus failed\n"); }

    if (MFT_INPUT_STATUS_ACCEPT_DATA != mftStatus)
        printf("MFT_INPUT_STATUS_ACCEPT_DATA\n");

    hr = pMFTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
    if (!SUCCEEDED(hr)) { printf("MFT_MESSAGE_NOTIFY_BEGIN_STREAMING failed\n"); }
    hr = pMFTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL);
    if (!SUCCEEDED(hr)) { printf("MFT_MESSAGE_NOTIFY_START_OF_STREAM failed\n"); }

    SafeRelease(&pSinkWriter);
    SafeRelease(&pMediaTypeOut);
    SafeRelease(&pMediaTypeIn);
    return hr;
}

ID3D11Texture2D* texture;

HRESULT WriteFrame(IMFSinkWriter* pWriter, DWORD streamIndex, const LONGLONG& rtStart)
{
    IMFSample* pSample = NULL;
    IMFMediaBuffer* pBuffer = NULL;

    HRESULT hr;
    
    hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), texture, 0, false, &pBuffer);
    if (!SUCCEEDED(hr)) { printf("MFCreateDXGISurfaceBuffer failed\n"); }

    DWORD len;
    hr = ((IMF2DBuffer*)pBuffer)->GetContiguousLength(&len);
    if (!SUCCEEDED(hr)) { printf("GetContiguousLength failed\n"); }

    hr = pBuffer->SetCurrentLength(len);
    if (!SUCCEEDED(hr)) { printf("SetCurrentLength failed\n"); }

    // Create a media sample and add the buffer to the sample.
    hr = MFCreateSample(&pSample);
    if (!SUCCEEDED(hr)) { printf("MFCreateSample failed\n"); }

    hr = pSample->AddBuffer(pBuffer);
    if (!SUCCEEDED(hr)) { printf("AddBuffer failed\n"); }

    // Set the time stamp and the duration.
    hr = pSample->SetSampleTime(rtStart);
    if (!SUCCEEDED(hr)) { printf("SetSampleTime failed\n"); }

    hr = pSample->SetSampleDuration(VIDEO_FRAME_DURATION);
    if (!SUCCEEDED(hr)) { printf("SetSampleDuration failed\n"); }

    // Send the sample to the Sink Writer or Encoder.

    if (!usingEncoder)
    {
        hr = pWriter->WriteSample(streamIndex, pSample);
        if (!SUCCEEDED(hr)) { printf("WriteSample failed\n"); }
    }
    else
    {
        hr = pMFTransform->ProcessInput(0, pSample, 0);
        if (!SUCCEEDED(hr)) { printf("ProcessInput failed\n"); }
    }
    
    SafeRelease(&pSample);
    SafeRelease(&pBuffer);
    return hr;
}

int APIENTRY main(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)
{
    SetupDpiAwareness();
    auto err = dup.Initialize();

    // Initialize MF
    CoInitializeEx(0, COINIT_APARTMENTTHREADED); // Need to call this once when a thread is using COM or it wont work
    MFStartup(MF_VERSION);                       // Need to call this too for Media Foundation related memes

    IMFSinkWriter* pSinkWriter = NULL;
    DWORD          stream = 0;
    LONGLONG       rtStart = 0;

    usingEncoder = true; // True if we want to encode with IMFTransform, false if we want to write with SinkWriter
    
    HRESULT        hr = SetMediaType();
    if (!SUCCEEDED(hr)) { printf("SetMediaType failed\n"); }

    if (!usingEncoder)
    {
        hr = InitializeSinkWriter(&pSinkWriter, &stream);
        if (!SUCCEEDED(hr)) { printf("InitializeSinkWriter failed\n"); }
    }
    else
    {
        hr = pMediaTypeIn->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_IYUV); // Using MFVideoFormat_ARGB32 causes SetInputType() to fail
        hr = InitializeEncoder(&stream);
        if (!SUCCEEDED(hr)) { printf("InitializeEncoder failed\n"); }
    }
    
    const int CAPTURE_LENGTH = 10;

    int total_frames = VIDEO_FPS * CAPTURE_LENGTH;

    for (int i = 0; i < 1; i++)
    {
        texture = dup.CaptureNext();
        if (texture != nullptr)
        {
            hr = WriteFrame(pSinkWriter, stream, rtStart);
            if (!SUCCEEDED(hr))
                printf("WriteFrame failed\n");
            rtStart += VIDEO_FRAME_DURATION;
            texture->Release();
        }
        else
        {
            i--;
        }
    }

    if (FAILED(hr))
    {
        std::cout << "Failure" << std::endl;
    }

    if (SUCCEEDED(hr)) {
        hr = pSinkWriter->Finalize();
    }

    SafeRelease(&pSinkWriter);
    MFShutdown();
    CoUninitialize();
}

Solution

  • Here’s documentation for the Microsoft’s software CPU-based h.264 encoder you’re using in your code.

    It does not support MFVideoFormat_ARGB32 on input. It doesn’t support any RGB formats at all. That transform only supports YUV formats for the input video.

    BTW, if you replace the MFT with a hardware encoder, they’re very likely to expose same set of features as the Microsoft’s software one, I don’t think they support RGB. And, because all hardware transforms are asynchronous, you gonna need slightly different workflow to drive them directly.

    The reason sink writer works OK, it creates and hosts 2 MFTs under the hood, the format converter from RGB to YUV, another one is the encoder.

    You have following options.

    1. Use another MFT to convert RGBA to NV12 before passing frames to the encoder.

    2. Do that conversation yourself with pixel shaders (render a textured quad into 2 planes of NV12 texture using 2 different pixel shaders), or with a single compute shader (dispatch 1 thread for every 2x2 block of the video, write 6 bytes for every block, 4 into R8_UNORM output texture with brightness, other 2 bytes into R8G8_UNORM output texture with color data).

    3. Use a sink writer, but create it with MFCreateSinkWriterFromMediaSink API instead of MFCreateSinkWriterFromURL. Implement IMFMediaSink COM interface, also IMFStreamSink for it’s video stream, and the framework will call IMFStreamSink.ProcessSample giving you encoded video samples in system memory as soon as they’re available.