Search code examples
ms-media-foundation

Capturing Audio In Media Foundation - ReadSample Never Returns


I am trying to capture audio from my microphone in Media Foundation. The issue I am having is that when I go to ReadSample, the function never returns. All my HRESULT checks pass. This happens for any device I try. I tried changing the MFT SUBTYPE to other formats as well. Without any actual errors, I'm not exactly sure what I should be looking for.

main.cpp

#include "../../lib/audio_utils.h"
#include "../CaptureAndDisplayVideoDevice/AudioDeviceCapturer.h"

static HRESULT stub_function()
{
    HRESULT hr = S_OK;

    std::vector<audio_device_info> audio_devices_information;
    if (FAILED(hr = get_audio_devices_info(audio_devices_information)))
        return hr;

    IMFMediaSource* media_source;
    if(FAILED(hr = get_device_source_from_audio_device_info(audio_devices_information[0], &media_source)))
        return hr;

    AudioDeviceCapturer* adc = new AudioDeviceCapturer(media_source);

    DWORD stream;
    DWORD flags;
    LONGLONG timestamp;

    while (true)
    {
        CComPtr<IMFSample> sample = nullptr;
        // this is reading in syncronous blocking mode, MF supports also async calls
        hr = adc->m_source_reader->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 0, &stream, &flags, &timestamp, &sample);

        if (flags & MF_SOURCE_READERF_STREAMTICK)
            continue;

        CComPtr<IMFMediaBuffer> buffer;
    }
}

int main()
{
    stub_function();
}

AudioDeviceCapturer.h

#pragma once

#define COBJMACROS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>

#include <stdio.h>
#include <intrin.h>

#include <atlbase.h>

// std
#include <iostream>
#include <string>

#pragma comment (lib, "ole32.lib")
#pragma comment (lib, "mf.lib")
#pragma comment (lib, "mfplat.lib")
#pragma comment (lib, "mfuuid.lib")
#pragma comment (lib, "mfreadwrite.lib")

class AudioDeviceCapturer
{
public:
    IMFMediaSource* m_device_source;
    IMFSourceReader* m_source_reader;

    AudioDeviceCapturer(IMFMediaSource* device_source) : m_device_source(device_source)
    {
        HRESULT hr = S_OK;

        ATLENSURE_SUCCEEDED(MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET));

        //
        {
            CComPtr<IMFAttributes> encoder_attributes;

            MFCreateAttributes(&encoder_attributes, 2);

            hr = MFCreateSourceReaderFromMediaSource(m_device_source, encoder_attributes, &m_source_reader);
        }

        CComPtr<IMFMediaType> type;

        ATLENSURE_SUCCEEDED(MFCreateMediaType(&type));

        ATLENSURE_SUCCEEDED(type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));

        ATLENSURE_SUCCEEDED(type->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM));
    }
private:
};

audio_utils.h

#pragma once

#include <Windows.h>
#include <cstdint>
#include <tuple>
#include <assert.h>
#include <mfapi.h>
#include <mfidl.h>
#include <atlbase.h>
#include <iostream>
#include <vector>

// libs
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "evr.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "Winmm.lib")

struct audio_device_info
{
    wchar_t name[255];
    wchar_t endpoint_id[255];
};

HRESULT get_audio_devices_info(std::vector<audio_device_info>& audio_devices_information)
{
    HRESULT hr;

    hr = MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET);

    uint32_t count;
    IMFActivate** devices;

    CComPtr<IMFAttributes> attr;

    hr = MFCreateAttributes(&attr, 1);

    hr = attr->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_GUID);

    hr = MFEnumDeviceSources(attr, &devices, &count);

    printf("Detected %u devices:\n", count);

    for (uint32_t i = 0; i < count; i++)
    {
        audio_device_info audio_device {};

        uint32_t length;
        LPWSTR name;
        LPWSTR endpoint_id;

        hr = devices[i]->GetAllocatedString(MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME, &name, &length);

        hr = devices[i]->GetAllocatedString(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_ENDPOINT_ID, &endpoint_id, &length);

        wcscpy_s(audio_device.name, name);
        wcscpy_s(audio_device.endpoint_id, endpoint_id);


        {
            IMFMediaSource* video_device_source;
            CComPtr<IMFAttributes> attr;
            ATLENSURE_SUCCEEDED(MFCreateAttributes(&attr, 2));
            ATLENSURE_SUCCEEDED(attr->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_GUID));
            ATLENSURE_SUCCEEDED(attr->SetString(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_ENDPOINT_ID, endpoint_id));
            ATLENSURE_SUCCEEDED(MFCreateDeviceSource(attr, &video_device_source));

            IMFPresentationDescriptor* pPD = nullptr;
            IMFStreamDescriptor* pSD = nullptr;
            IMFMediaTypeHandler* pHandler = nullptr;
            IMFMediaType* pType = nullptr;

            ATLENSURE_SUCCEEDED(video_device_source->CreatePresentationDescriptor(&pPD));

            BOOL fSelected;
            ATLENSURE_SUCCEEDED(pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD));

            ATLENSURE_SUCCEEDED(pSD->GetMediaTypeHandler(&pHandler));

            DWORD cTypes = 0;
            ATLENSURE_SUCCEEDED(pHandler->GetMediaTypeCount(&cTypes));

            for (DWORD i = 0; i < cTypes; i++)
            {
                ATLENSURE_SUCCEEDED(pHandler->GetMediaTypeByIndex(i, &pType));

                GUID Subtype;
                hr = pType->GetGUID(MF_MT_SUBTYPE, &Subtype);
                wchar_t SubtypeText[64];
                StringFromGUID2(Subtype, SubtypeText, static_cast<int>(std::size(SubtypeText)));
            }
        }

        audio_devices_information.push_back(audio_device);

        printf("%S = %S\n", name, endpoint_id);

        CoTaskMemFree(name);
        CoTaskMemFree(endpoint_id);

        devices[i]->Release();
    }

    CoTaskMemFree(devices);

    MFShutdown();

    return hr;
}

HRESULT get_device_source_from_audio_device_info(audio_device_info device_info, IMFMediaSource** video_device_source)
{
    HRESULT hr = S_OK;

    ATLENSURE_SUCCEEDED(MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET));

    CComPtr<IMFAttributes> attr;
    ATLENSURE_SUCCEEDED(MFCreateAttributes(&attr, 2));

    ATLENSURE_SUCCEEDED(attr->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_GUID));

    ATLENSURE_SUCCEEDED(attr->SetString(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_ENDPOINT_ID, device_info.endpoint_id));

    ATLENSURE_SUCCEEDED(MFCreateDeviceSource(attr, video_device_source));

    MFShutdown();

    return hr;
}


#ifndef DEBUG // works in VS
#define DebugPrint(x) 
#else
#define DebugPrint(x) do { std::cout << x << std::endl; } while (0)
#endif

Solution

  • MFStartup and MFShutdown should only be called once for the duration of your presented code, for example, remove them everywhere and change main like this:

    int main()
    {
        MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET);
        stub_function();
        MFShutdown();
    }