Search code examples
h.264decodingms-media-foundation

Decoding H264 Stream Always Returns MF_E_TRANSFORM_NEED_MORE_INPUT


I'm attempting to decode raw h264 from a network stream using the Media Foundation Transform CLSID_MSH264DecoderMFT. Setting up the transform seems to work and it's accepting data. However, no matter how much data I provide, it always returns MF_E_TRANSFORM_NEED_MORE_INPUT.

The document says, that the decoder will skip over all data until it finds valid Sequence and Picture Parameters. I'm providing this and then a raw data frame along with start codes:

1   00 00 00 01 67 42 c0 28 da 01 e0 19 fe 7c 05 a8 08 08 0a 00 00 03 00 02 00 00 03 00 61 1e 30 65
2   40 00 00 00 01 68 ce 3c 80 00 00 00 01 00 00 0e 6c 41 9a e0 eb 08 84 3c 14 ff fe 10 ff f8 64 14
3   f0 88 20 11 55 d5 7e 19 11 17 17 c5 c5 3f 05 00 a3 86 41 08 8a ae ab 58 8c 1f 11 88 cd f8 9f ff
4   f8 9d 78 21 f9 2a bf e2 3e 04 1f f8 20 08 92 7c 0e 33 52 67 e1 48 74 32 f8 5c 5f ca fd 77 12 df
5   3a 0f 93 11 89 2f 26 98 76 16 65 9b 78 87 77 ff ff fe 27 c6 fe b1 39 34 27 04 17 55 f0 61 fe 23

Above is only a partial sample, but it's representative of the data I provide to the transform.

Transform Setup:

    ComPtr<IUnknown> pUnknown = nullptr;
    HRESULT hResult = CoCreateInstance(CLSID_MSH264DecoderMFT, nullptr, CLSCTX_INPROC_SERVER, IID_IUnknown, &pUnknown);
    if (S_OK != hResult) {
        LogError("Failed to create H264 decoder");
        return false;
    }

    hResult = pUnknown->QueryInterface(IID_PPV_ARGS(&mVideoDecoder));
    if (hResult != S_OK) {
        LogError("Failed to create H264 decoder");
        return false;
    }
    
    ComPtr<IMFMediaType> pInputMediaType = nullptr;
    hResult = MFCreateMediaType(&pInputMediaType);
    if (S_OK != hResult) {
        return false;
    }

    pInputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
    pInputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);

    std::shared_ptr<VideoMp4Track> videoTrack = mDemuxer->getVideoTrack();

    uint32_t width = videoTrack->getWidth();
    uint32_t height = videoTrack->getHeight();
    MFSetAttributeSize(pInputMediaType.Get(), MF_MT_FRAME_SIZE, width, height);
    MFSetAttributeRatio(pInputMediaType.Get(), MF_MT_PIXEL_ASPECT_RATIO, width, height);
    MFSetAttributeRatio(pInputMediaType.Get(), MF_MT_FRAME_RATE, videoTrack->getFrameRate(), 1);
    pInputMediaType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_MixedInterlaceOrProgressive);

    ComPtr<IMFAttributes> attributes;
    mVideoDecoder->GetAttributes(&attributes);

    hResult = attributes->SetUINT32(CODECAPI_AVLowLatencyMode, 1);
    if (hResult != S_OK) {
        LogError("Failed to set low latency mode. Video might be choppy.");
    }

    hResult = attributes->SetUINT32(CODECAPI_AVDecVideoAcceleration_H264, 1);
    if (hResult != S_OK) {
        LogError("Failed to set GPU acceleration. Video might be choppy.");
    }

    hResult = mVideoDecoder->SetInputType(0, pInputMediaType.Get(), 0);
    if (hResult != S_OK) {
        LogError("Failed to set input type for decoder");
        return false;
    }

    ComPtr<IMFMediaType> pOutputType = nullptr;
    hResult = MFCreateMediaType(&pOutputType);
    if (S_OK != hResult) {
        return false;
    }

    pOutputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
    pOutputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12);
    MFSetAttributeSize(pOutputType.Get(), MF_MT_FRAME_SIZE, width, height);
    MFSetAttributeRatio(pOutputType.Get(), MF_MT_PIXEL_ASPECT_RATIO, width, height);
    MFSetAttributeRatio(pOutputType.Get(), MF_MT_FRAME_RATE, videoTrack->getFrameRate(), 1);

    hResult = mVideoDecoder->SetOutputType(0, pOutputType.Get(), 0);
    if (hResult != S_OK) {
        LogError("Failed to set input type for decoder");
        return false;
    }

    // Notify the resampler.
    hResult = mVideoDecoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
    if (S_OK != hResult) {
        LogError("Failed to send flush command to the decoder.");
        return false;
    }

    hResult = mVideoDecoder->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
    if (S_OK != hResult) {
        LogError("Failed to send notify command to the decoder.");
        return false;
    }

    hResult = mVideoDecoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL);
    if (S_OK != hResult) {
        LogError("Failed to send notify command to the decoder.");
        return false;
    }

I have no idea why it isn't able to decode, would appreciate any help.

Thanks.

Edit:

DataPtr transformData = MakeDataPtr();

uint32_t startCode = 0x01000000;

std::shared_ptr<VideoMp4Track> video = mImpl->mDemuxer->getVideoTrack();

transformData->appendBytes(&startCode, 4);

DataPtr sps = video->getSequenceParameters();
transformData->appendData(*sps);

transformData->appendBytes(&startCode, 4);

DataPtr pps = video->getPictureParameters();
transformData->appendData(*pps);

transformData->appendBytes(&startCode, 4);

transformData->appendData(*sampleData);

transformData->appendBytes(&startCode, 4);

ComPtr<IMFSample> pSample = mImpl->createMFSample(transformData->getBytes(), transformData->getSize());
if (nullptr == pSample) {
    LogError("Failed to create the buffer for decoder input");
    return nullptr;
}

HRESULT hResult = mImpl->mVideoDecoder->ProcessInput(0, pSample.Get(), 0);
if (hResult != S_OK) {
    if (hResult == MF_E_NOTACCEPTING) {
        mImpl->mVideoDecoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
        hResult = mImpl->mVideoDecoder->ProcessInput(0, pSample.Get(), 0);
    }
    else {
        LogError("Error feeding to resampler...");
        return nullptr;
    }
}

DWORD dwStatus = 0;
// outputDataBuffer is empty, need to create it.
MFT_OUTPUT_DATA_BUFFER outputDataBuffer{};
ComPtr<IMFSample> pVideoSample = nullptr;
hResult = MFCreateSample(&pVideoSample);
if (S_OK != hResult) {
    LogError("Failed to create a media sample for decoder output");
    return false;
}

ComPtr<IMFMediaBuffer> pOutputBuffer = nullptr;
hResult = MFCreateMemoryBuffer(sampleData->getSize(), &pOutputBuffer);
if (S_OK != hResult) {
    LogError("Failed to create a memory buffer for decoder output");
    return false;
}

pVideoSample->AddBuffer(pOutputBuffer.Get());

outputDataBuffer.pSample = pVideoSample.Get();
do {

    hResult = mImpl->mVideoDecoder->ProcessOutput(0, 1, &outputDataBuffer, &dwStatus);
    if (hResult == MF_E_TRANSFORM_NEED_MORE_INPUT) {
        // conversion end
        break;
    }

I've omitted the rest because it never gets further, it just stays in this loop populating the transform.

Edit 2:

(Not) Working sample on github

https://github.com/pma07pg/h264

The sample code was too large to dump here so I've put the main.cpp on github. Should be able to just put it into a VS project and run it off the bat.


Solution

  • There are few bugs in your code.

    1.) You didn't account for the start code size

    yours:

    const uint32_t parameterInputSize = sizeof(pictureParameters) + sizeof(sequenceParameters);
    

    mine:

    const uint32_t parameterInputSize = sizeof(startCode) + sizeof(pictureParameters) + sizeof(startCode) + sizeof(sequenceParameters);
    

    Your 'mdat's contain more than one AccessUnit. Each AccessUnit is prefixed with its length which you have to replace with a start code.

    Your 'mdat':

    'mdat' = <size> data[0] | <size> data[1] | ... | <size> data[n] |
    

    Replace the size with a start code and break the multiple Access Units into individual Access Units.

    Required decoder input:

    00 00 00 01 data[0]
    00 00 00 01 data[1]
    ...
    00 00 00 01 data[n]
    

    See details here: https://github.com/go4shoe/MedieFoundationExample