I'm attempting to decode raw h264 from a network stream using the Media Foundation Transform CLSID_MSH264DecoderMFT
. Setting up the transform seems to work and it's accepting data. However, no matter how much data I provide, it always returns MF_E_TRANSFORM_NEED_MORE_INPUT
.
The document says, that the decoder will skip over all data until it finds valid Sequence and Picture Parameters. I'm providing this and then a raw data frame along with start codes:
1 00 00 00 01 67 42 c0 28 da 01 e0 19 fe 7c 05 a8 08 08 0a 00 00 03 00 02 00 00 03 00 61 1e 30 65
2 40 00 00 00 01 68 ce 3c 80 00 00 00 01 00 00 0e 6c 41 9a e0 eb 08 84 3c 14 ff fe 10 ff f8 64 14
3 f0 88 20 11 55 d5 7e 19 11 17 17 c5 c5 3f 05 00 a3 86 41 08 8a ae ab 58 8c 1f 11 88 cd f8 9f ff
4 f8 9d 78 21 f9 2a bf e2 3e 04 1f f8 20 08 92 7c 0e 33 52 67 e1 48 74 32 f8 5c 5f ca fd 77 12 df
5 3a 0f 93 11 89 2f 26 98 76 16 65 9b 78 87 77 ff ff fe 27 c6 fe b1 39 34 27 04 17 55 f0 61 fe 23
Above is only a partial sample, but it's representative of the data I provide to the transform.
Transform Setup:
ComPtr<IUnknown> pUnknown = nullptr;
HRESULT hResult = CoCreateInstance(CLSID_MSH264DecoderMFT, nullptr, CLSCTX_INPROC_SERVER, IID_IUnknown, &pUnknown);
if (S_OK != hResult) {
LogError("Failed to create H264 decoder");
return false;
}
hResult = pUnknown->QueryInterface(IID_PPV_ARGS(&mVideoDecoder));
if (hResult != S_OK) {
LogError("Failed to create H264 decoder");
return false;
}
ComPtr<IMFMediaType> pInputMediaType = nullptr;
hResult = MFCreateMediaType(&pInputMediaType);
if (S_OK != hResult) {
return false;
}
pInputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
pInputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
std::shared_ptr<VideoMp4Track> videoTrack = mDemuxer->getVideoTrack();
uint32_t width = videoTrack->getWidth();
uint32_t height = videoTrack->getHeight();
MFSetAttributeSize(pInputMediaType.Get(), MF_MT_FRAME_SIZE, width, height);
MFSetAttributeRatio(pInputMediaType.Get(), MF_MT_PIXEL_ASPECT_RATIO, width, height);
MFSetAttributeRatio(pInputMediaType.Get(), MF_MT_FRAME_RATE, videoTrack->getFrameRate(), 1);
pInputMediaType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_MixedInterlaceOrProgressive);
ComPtr<IMFAttributes> attributes;
mVideoDecoder->GetAttributes(&attributes);
hResult = attributes->SetUINT32(CODECAPI_AVLowLatencyMode, 1);
if (hResult != S_OK) {
LogError("Failed to set low latency mode. Video might be choppy.");
}
hResult = attributes->SetUINT32(CODECAPI_AVDecVideoAcceleration_H264, 1);
if (hResult != S_OK) {
LogError("Failed to set GPU acceleration. Video might be choppy.");
}
hResult = mVideoDecoder->SetInputType(0, pInputMediaType.Get(), 0);
if (hResult != S_OK) {
LogError("Failed to set input type for decoder");
return false;
}
ComPtr<IMFMediaType> pOutputType = nullptr;
hResult = MFCreateMediaType(&pOutputType);
if (S_OK != hResult) {
return false;
}
pOutputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
pOutputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12);
MFSetAttributeSize(pOutputType.Get(), MF_MT_FRAME_SIZE, width, height);
MFSetAttributeRatio(pOutputType.Get(), MF_MT_PIXEL_ASPECT_RATIO, width, height);
MFSetAttributeRatio(pOutputType.Get(), MF_MT_FRAME_RATE, videoTrack->getFrameRate(), 1);
hResult = mVideoDecoder->SetOutputType(0, pOutputType.Get(), 0);
if (hResult != S_OK) {
LogError("Failed to set input type for decoder");
return false;
}
// Notify the resampler.
hResult = mVideoDecoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
if (S_OK != hResult) {
LogError("Failed to send flush command to the decoder.");
return false;
}
hResult = mVideoDecoder->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
if (S_OK != hResult) {
LogError("Failed to send notify command to the decoder.");
return false;
}
hResult = mVideoDecoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL);
if (S_OK != hResult) {
LogError("Failed to send notify command to the decoder.");
return false;
}
I have no idea why it isn't able to decode, would appreciate any help.
Thanks.
Edit:
DataPtr transformData = MakeDataPtr();
uint32_t startCode = 0x01000000;
std::shared_ptr<VideoMp4Track> video = mImpl->mDemuxer->getVideoTrack();
transformData->appendBytes(&startCode, 4);
DataPtr sps = video->getSequenceParameters();
transformData->appendData(*sps);
transformData->appendBytes(&startCode, 4);
DataPtr pps = video->getPictureParameters();
transformData->appendData(*pps);
transformData->appendBytes(&startCode, 4);
transformData->appendData(*sampleData);
transformData->appendBytes(&startCode, 4);
ComPtr<IMFSample> pSample = mImpl->createMFSample(transformData->getBytes(), transformData->getSize());
if (nullptr == pSample) {
LogError("Failed to create the buffer for decoder input");
return nullptr;
}
HRESULT hResult = mImpl->mVideoDecoder->ProcessInput(0, pSample.Get(), 0);
if (hResult != S_OK) {
if (hResult == MF_E_NOTACCEPTING) {
mImpl->mVideoDecoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
hResult = mImpl->mVideoDecoder->ProcessInput(0, pSample.Get(), 0);
}
else {
LogError("Error feeding to resampler...");
return nullptr;
}
}
DWORD dwStatus = 0;
// outputDataBuffer is empty, need to create it.
MFT_OUTPUT_DATA_BUFFER outputDataBuffer{};
ComPtr<IMFSample> pVideoSample = nullptr;
hResult = MFCreateSample(&pVideoSample);
if (S_OK != hResult) {
LogError("Failed to create a media sample for decoder output");
return false;
}
ComPtr<IMFMediaBuffer> pOutputBuffer = nullptr;
hResult = MFCreateMemoryBuffer(sampleData->getSize(), &pOutputBuffer);
if (S_OK != hResult) {
LogError("Failed to create a memory buffer for decoder output");
return false;
}
pVideoSample->AddBuffer(pOutputBuffer.Get());
outputDataBuffer.pSample = pVideoSample.Get();
do {
hResult = mImpl->mVideoDecoder->ProcessOutput(0, 1, &outputDataBuffer, &dwStatus);
if (hResult == MF_E_TRANSFORM_NEED_MORE_INPUT) {
// conversion end
break;
}
I've omitted the rest because it never gets further, it just stays in this loop populating the transform.
Edit 2:
(Not) Working sample on github
https://github.com/pma07pg/h264
The sample code was too large to dump here so I've put the main.cpp on github. Should be able to just put it into a VS project and run it off the bat.
There are few bugs in your code.
1.) You didn't account for the start code size
yours:
const uint32_t parameterInputSize = sizeof(pictureParameters) + sizeof(sequenceParameters);
mine:
const uint32_t parameterInputSize = sizeof(startCode) + sizeof(pictureParameters) + sizeof(startCode) + sizeof(sequenceParameters);
Your 'mdat's contain more than one AccessUnit. Each AccessUnit is prefixed with its length which you have to replace with a start code.
Your 'mdat':
'mdat' = <size> data[0] | <size> data[1] | ... | <size> data[n] |
Replace the size with a start code and break the multiple Access Units into individual Access Units.
Required decoder input:
00 00 00 01 data[0]
00 00 00 01 data[1]
...
00 00 00 01 data[n]
See details here: https://github.com/go4shoe/MedieFoundationExample