Search code examples
c++audiocodecms-media-foundation

How to increase mp3 decoding quality (Media Foundation)?


I have file .wav that I need to convert in .mp3 in order to do it I am using MediaFoundation. This is approach that I use:

#include "TV_AudioEncoderMF.h"

#include <windows.h>
#include <windowsx.h>

#include <atlstr.h>
#include <comdef.h>
#include <exception>

#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
#include <mferror.h>
#include <Wmcodecdsp.h>

#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid")

TV_AudioEncoderMF::TV_AudioEncoderMF()
{
}


TV_AudioEncoderMF::~TV_AudioEncoderMF()
{
}

template <class T> void SafeRelease(T **ppT)
{
    if (*ppT)
    {
        (*ppT)->Release();
        *ppT = nullptr;
    }
}

HRESULT TV_AudioEncoderMF::GetOutputMediaTypes(
    GUID cAudioFormat,
    UINT32 cSampleRate,
    UINT32 cBitPerSample,
    UINT32 cChannels,
    IMFMediaType **ppType
)
{
    // Enumerate all codecs except for codecs with field-of-use restrictions.
    // Sort the results.
    DWORD dwFlags =
        (MFT_ENUM_FLAG_ALL & (~MFT_ENUM_FLAG_FIELDOFUSE)) |
        MFT_ENUM_FLAG_SORTANDFILTER;

    IMFCollection   *pAvailableTypes = NULL;    // List of audio media types.
    IMFMediaType    *pAudioType = NULL;         // Corresponding codec.

    HRESULT hr = MFTranscodeGetAudioOutputAvailableTypes(
        cAudioFormat,
        dwFlags,
        NULL,
        &pAvailableTypes
    );

    // Get the element count.
    DWORD dwMTCount;
    hr = pAvailableTypes->GetElementCount(&dwMTCount);

    // Iterate through the results and check for the corresponding codec.
    for (DWORD i = 0; i < dwMTCount; i++)
    {
        hr = pAvailableTypes->GetElement(i, (IUnknown**)&pAudioType);

        GUID majorType;
        hr = pAudioType->GetMajorType(&majorType);

        GUID subType;
        hr = pAudioType->GetGUID(MF_MT_SUBTYPE, &subType);

        if (majorType != MFMediaType_Audio || subType != MFAudioFormat_FLAC)
        {
            continue;
        }

        UINT32 sampleRate = NULL;
        hr = pAudioType->GetUINT32(
            MF_MT_AUDIO_SAMPLES_PER_SECOND,
            &sampleRate
        );

        UINT32 bitRate = NULL;
        hr = pAudioType->GetUINT32(
            MF_MT_AUDIO_BITS_PER_SAMPLE,
            &bitRate
        );

        UINT32 channels = NULL;
        hr = pAudioType->GetUINT32(
            MF_MT_AUDIO_NUM_CHANNELS,
            &channels
        );

        if (sampleRate == cSampleRate
            && bitRate == cBitPerSample
            && channels == cChannels)
        {
            // Found the codec.
            // Jump out!
            break;
        }
    }

    // Add the media type to the caller
    *ppType = pAudioType;
    (*ppType)->AddRef();
    SafeRelease(&pAudioType);

    return hr;
}

void TV_AudioEncoderMF::decode()
{
    HRESULT hr = S_OK;

    // Initialize com interface
    CoInitializeEx(0, COINIT_MULTITHREADED);

    // Start media foundation
    MFStartup(MF_VERSION);

    IMFMediaType *pInputType = NULL;
    IMFSourceReader *pSourceReader = NULL;
    IMFMediaType *pOuputMediaType = NULL;
    IMFSinkWriter *pSinkWriter = NULL;

    // Create source reader
    hr = MFCreateSourceReaderFromURL(
        L"D:\\buffer\\del\\out\\test.wav",
        NULL,
        &pSourceReader
    );

    // Create sink writer
    hr = MFCreateSinkWriterFromURL(
        L"D:\\buffer\\del\\out\\test_out.mp3",
        NULL,
        NULL,
        &pSinkWriter
    );

    // Get media type from source reader
    hr = pSourceReader->GetCurrentMediaType(
        MF_SOURCE_READER_FIRST_AUDIO_STREAM,
        &pInputType
    );

    // Get sample rate, bit rate and channels
    UINT32 sampleRate = NULL;
    hr = pInputType->GetUINT32(
        MF_MT_AUDIO_SAMPLES_PER_SECOND,
        &sampleRate
    );

    UINT32 bitRate = NULL;
    hr = pInputType->GetUINT32(
        MF_MT_AUDIO_BITS_PER_SAMPLE,
        &bitRate
    );

    UINT32 channels = NULL;
    hr = pInputType->GetUINT32(
        MF_MT_AUDIO_NUM_CHANNELS,
        &channels
    );

    // Try to find a media type that is fitting.
    hr = GetOutputMediaTypes(
        MFAudioFormat_MP3,
        sampleRate,
        bitRate,
        channels,
        &pOuputMediaType);

    DWORD dwWriterStreamIndex = -1;

    // Add the stream
    hr = pSinkWriter->AddStream(
        pOuputMediaType,
        &dwWriterStreamIndex
    );

    // Set input media type
    hr = pSinkWriter->SetInputMediaType(
        dwWriterStreamIndex,
        pInputType,
        NULL
    );

    // Tell the sink writer to accept data
    hr = pSinkWriter->BeginWriting();

    // Forever alone loop
    while (true)
    {
        DWORD nStreamIndex, nStreamFlags;
        LONGLONG nTime;
        IMFSample *pSample;

        // Read through the samples until...
        hr = pSourceReader->ReadSample(
            MF_SOURCE_READER_FIRST_AUDIO_STREAM,
            0,
            &nStreamIndex,
            &nStreamFlags,
            &nTime,
            &pSample);

        if (pSample)
        {
            hr = pSinkWriter->WriteSample(
                dwWriterStreamIndex,
                pSample
            );
        }

        // ... we are at the end of the stream...
        if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
        {
            // ... and jump out.
            break;
        }
    }

    // Call finalize to finish writing.
    hr = pSinkWriter->Finalize();
    // Done :D
}

Problem is - it is a big difference in audio quality, when I playback (by win standard players) .wav file it sounds good, but when I playback compressed to .mp3 file sound it sounds like person recorded his voice on recorder with a very bad quality.

What is a possible problem here? I don't see any possible way to set out quality, like setOutQualityInPersent(100)

EDIT

void co_AudioEncoderMF::decode()
{
    HRESULT hr = S_OK;

    // Initialize com interface
    CoInitializeEx(0, COINIT_MULTITHREADED);

    // Start media foundation
    MFStartup(MF_VERSION);

    IMFMediaType *pInputType = NULL;
    IMFSourceReader *pSourceReader = NULL;
    IMFMediaType *pOuputMediaType = NULL;
    IMFSinkWriter *pSinkWriter = NULL;

    // Create source reader
    hr = MFCreateSourceReaderFromURL(
        L"D:\\buffer\\del\\out\\test.wav",
        NULL,
        &pSourceReader
    );

    // Create sink writer
    hr = MFCreateSinkWriterFromURL(
        L"D:\\buffer\\del\\out\\test_out.mp3",
        NULL,
        NULL,
        &pSinkWriter
    );

    // Get media type from source reader
    hr = pSourceReader->GetCurrentMediaType(
        MF_SOURCE_READER_FIRST_AUDIO_STREAM,
        &pInputType
    );

    // Get sample rate, bit rate and channels
    UINT32 sampleRate = NULL;
    hr = pInputType->GetUINT32(
        MF_MT_AUDIO_SAMPLES_PER_SECOND,
        &sampleRate
    );

    UINT32 bitRate = NULL;
    hr = pInputType->GetUINT32(
        MF_MT_AUDIO_BITS_PER_SAMPLE,
        &bitRate
    );

    UINT32 channels = NULL;
    hr = pInputType->GetUINT32(
        MF_MT_AUDIO_NUM_CHANNELS,
        &channels
    );

    // Try to find a media type that is fitting.
    hr = GetOutputMediaTypes(
        MFAudioFormat_MP3,
        sampleRate,
        bitRate,
        channels,
        &pOuputMediaType);

    bitRate = bitRate + 2;   <------- This line 
    pOuputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitRate); <------- This line 

    DWORD dwWriterStreamIndex = -1;

    // Add the stream
    hr = pSinkWriter->AddStream(
        pOuputMediaType,
        &dwWriterStreamIndex
    );

    // Set input media type
    hr = pSinkWriter->SetInputMediaType(
        dwWriterStreamIndex,
        pInputType,
        NULL
    );

    // Tell the sink writer to accept data
    hr = pSinkWriter->BeginWriting();

    // Forever alone loop
    while (true)
    {
        DWORD nStreamIndex, nStreamFlags;
        LONGLONG nTime;
        IMFSample *pSample;

        // Read through the samples until...
        hr = pSourceReader->ReadSample(
            MF_SOURCE_READER_FIRST_AUDIO_STREAM,
            0,
            &nStreamIndex,
            &nStreamFlags,
            &nTime,
            &pSample);

        if (pSample)
        {
            hr = pSinkWriter->WriteSample(
                dwWriterStreamIndex,
                pSample
            );
        }

        // ... we are at the end of the stream...
        if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
        {
            // ... and jump out.
            break;
        }
    }

    // Call finalize to finish writing.
    hr = pSinkWriter->Finalize();
    // Done :D
}

EDIT2

There are 2 files - https://drive.google.com/drive/folders/1yzB2u0TvMSnwsTpYnDDPFBDkTB75ZFwM?usp=sharing

Result and orig


Solution

  • This part is just broken:

        // Try to find a media type that is fitting.
        hr = GetOutputMediaTypes(
            MFAudioFormat_MP3,
            sampleRate,
            bitRate,
            channels,
            &pOuputMediaType);
    
        bitRate = bitRate + 2;   <------- This line 
        pOuputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitRate); <------- This line 
    

    To get you back on track, replace the fragment above with:

        MFCreateMediaType(&pOuputMediaType);
        pOuputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
        pOuputMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_MP3);
        pOuputMediaType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 128000 / 8);
        pOuputMediaType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, channels);
        pOuputMediaType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, sampleRate);
    

    and you'll start getting proper MP3.

    Note that the attributes above are taken directly from documentation: MP3 Audio Encoder. In your application you will need to make sure that target values remain valid and match the documented options. You might need to resample audio, for example.