Search code examples
caudioesp32dmai2s

How to set audio sampling rate using ADC/DAC Mode of ESP32 I2S driver?


I have combined some of the i2s-adc and sd-card examples from the esp-idf repo on GitHub to make a program that samples an analog microphone and stores the recording as a WAV file in an SD card. the problem is that my recorded audio is always played at half of the frequency it was set to be sampled.

As shown in the code below I2S_SAMPLE_RATE is set to 44100Hz but if I play, it sounds as if it was sampled at around 88200Hz. I can think of two reasons for that:

- The I2s driver is working at a higher sampling rate.

I have already tried hard coding the frequency with i2s_set_clk but had no luck.

- The recording was made with a low bit of depth so there's some distortion going on.

I am aware the esp32 has a 12-bit ADC and I am setting the BIT_SAMPLE as 16, but is that enough to cause the type of distortion that I described?

#include <stdio.h>
#include <string.h>
#include <math.h>
#include <sys/unistd.h>
#include <sys/stat.h>
#include "esp_log.h"
#include "esp_err.h"
#include "esp_system.h"
#include "esp_vfs_fat.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "driver/i2s.h"
#include "driver/gpio.h"
#include "driver/spi_common.h"
#include "sdmmc_cmd.h"
#include "sdkconfig.h"
#include "esp_adc_cal.h"
#include "driver/adc.h"

#define V_REF 1100

#define I2S_COMM_MODE 0 // ADC/DAC Mode
#define I2S_SAMPLE_RATE 44100
#define I2S_SAMPLE_BITS 16
#define I2S_BUF_DEBUG 0        // enable display buffer for debug
#define I2S_READ_LEN 16 * 1024 // I2S read buffer length
#define I2S_FORMAT (I2S_CHANNEL_FMT_RIGHT_LEFT)
#define I2S_CHANNEL_NUM 0              // I2S channel number
#define I2S_ADC_UNIT ADC_UNIT_1        // I2S built-in ADC unit
#define I2S_ADC_CHANNEL ADC1_CHANNEL_0 // I2S built-in ADC channel GPIO36

#define SPI_MOSI_GPIO 23
#define SPI_MISO_GPIO 19
#define SPI_CLK_GPIO 18
#define SPI_CS_GPIO 5

static const char *TAG = "I2S_ADC_REC";

#define BIT_SAMPLE 16

#define SPI_DMA_CHAN SPI_DMA_CH_AUTO
#define NUM_CHANNELS 1 // For mono recording only!
#define SD_MOUNT_POINT "/sdcard"
#define SAMPLE_SIZE (BIT_SAMPLE * 1024)
#define BYTE_RATE (I2S_SAMPLE_RATE * (BIT_SAMPLE / 8)) * NUM_CHANNELS

// When testing SD and SPI modes, keep in mind that once the card has been
// initialized in SPI mode, it can not be reinitialized in SD mode without
// toggling power to the card.
sdmmc_host_t host = SDSPI_HOST_DEFAULT();
sdmmc_card_t *card;

static int16_t i2s_readraw_buff[SAMPLE_SIZE];
size_t bytes_read;
const int WAVE_HEADER_SIZE = 44;

/**
 * @brief Initializes the slot without card detect (CD) and write protect (WP) signals.
 * It formats the card if mount fails and initializes the card. After the card has been
 * initialized, it print the card properties
 */
void mount_sdcard(void)
{
    esp_err_t ret;
    // Options for mounting the filesystem.
    // If format_if_mount_failed is set to true, SD card will be partitioned and
    // formatted in case when mounting fails.
    esp_vfs_fat_sdmmc_mount_config_t mount_config = {
        .format_if_mount_failed = true,
        .max_files = 5,
        .allocation_unit_size = 8 * 1024};
    ESP_LOGI(TAG, "Initializing SD card");

    spi_bus_config_t bus_cfg = {
        .mosi_io_num = SPI_MOSI_GPIO,
        .miso_io_num = SPI_MISO_GPIO,
        .sclk_io_num = SPI_CLK_GPIO,
        .quadwp_io_num = -1,
        .quadhd_io_num = -1,
        .max_transfer_sz = 4000,
    };
    ret = spi_bus_initialize(host.slot, &bus_cfg, SPI_DMA_CHAN);
    if (ret != ESP_OK)
    {
        ESP_LOGE(TAG, "Failed to initialize bus.");
        return;
    }

    // This initializes the slot without card detect (CD) and write protect (WP) signals.
    // Modify slot_config.gpio_cd and slot_config.gpio_wp if your board has these signals.
    sdspi_device_config_t slot_config = SDSPI_DEVICE_CONFIG_DEFAULT();
    slot_config.gpio_cs = SPI_CS_GPIO;
    slot_config.host_id = host.slot;

    ret = esp_vfs_fat_sdspi_mount(SD_MOUNT_POINT, &host, &slot_config, &mount_config, &card);

    if (ret != ESP_OK)
    {
        if (ret == ESP_FAIL)
        {
            ESP_LOGE(TAG, "Failed to mount filesystem.");
        }
        else
        {
            ESP_LOGE(TAG, "Failed to initialize the card (%s). "
                          "Make sure SD card lines have pull-up resistors in place.",
                     esp_err_to_name(ret));
        }
        return;
    }

    sdmmc_card_print_info(stdout, card); // Card has been initialized, print its properties
}

/**
 * @brief Generates the header for the WAV file that is going to be stored in the SD card.
 * See this for reference: http://soundfile.sapp.org/doc/WaveFormat/.
 */
void generate_wav_header(char *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
    uint32_t file_size = wav_size + WAVE_HEADER_SIZE - 8;
    uint32_t byte_rate = BYTE_RATE;

    const char set_wav_header[] = {
        'R', 'I', 'F', 'F',                                                  // ChunkID
        file_size, file_size >> 8, file_size >> 16, file_size >> 24,         // ChunkSize
        'W', 'A', 'V', 'E',                                                  // Format
        'f', 'm', 't', ' ',                                                  // Subchunk1ID
        0x10, 0x00, 0x00, 0x00,                                              // Subchunk1Size (16 for PCM)
        0x01, 0x00,                                                          // AudioFormat (1 for PCM)
        0x01, 0x00,                                                          // NumChannels (1 channel)
        sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
        byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24,         // ByteRate
        0x02, 0x00,                                                          // BlockAlign
        0x10, 0x00,                                                          // BitsPerSample (16 bits)
        'd', 'a', 't', 'a',                                                  // Subchunk2ID
        wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24,             // Subchunk2Size
    };

    memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}

/**
 * @brief I2S ADC mode init.
 */
void init_microphone(void)
{
    int i2s_num = I2S_COMM_MODE;
    i2s_config_t i2s_config = {
        .mode = I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX | I2S_MODE_DAC_BUILT_IN | I2S_MODE_ADC_BUILT_IN,
        .sample_rate = I2S_SAMPLE_RATE,
        .bits_per_sample = I2S_SAMPLE_BITS,
        .communication_format = I2S_COMM_FORMAT_STAND_MSB,
        .channel_format = I2S_FORMAT,
        .intr_alloc_flags = 0,
        .dma_buf_count = 6,
        .dma_buf_len = 256,
        .use_apll = 1,
    };

    // Call driver installation function and adc pad.
    ESP_ERROR_CHECK(i2s_driver_install(i2s_num, &i2s_config, 0, NULL));
    ESP_ERROR_CHECK(i2s_set_adc_mode(I2S_ADC_UNIT, I2S_ADC_CHANNEL));
}

void record_wav(uint32_t rec_time)
{
    // Use POSIX and C standard library functions to work with files.
    int flash_wr_size = 0;
    ESP_LOGI(TAG, "Opening file");

    char wav_header_fmt[WAVE_HEADER_SIZE];

    uint32_t flash_rec_size = BYTE_RATE * rec_time;
    generate_wav_header(wav_header_fmt, flash_rec_size, I2S_SAMPLE_RATE);

    // First check if file exists before creating a new file.
    struct stat st;
    if (stat(SD_MOUNT_POINT "/record.wav", &st) == 0)
    {
        // Delete it if it exists
        unlink(SD_MOUNT_POINT "/record.wav");
    }

    // Create new WAV file
    FILE *f = fopen(SD_MOUNT_POINT "/record.wav", "a");
    if (f == NULL)
    {
        ESP_LOGE(TAG, "Failed to open file for writing");
        return;
    }

    // Write the header to the WAV file
    fwrite(wav_header_fmt, 1, WAVE_HEADER_SIZE, f);

    i2s_adc_enable(I2S_CHANNEL_NUM);
    // Start recording
    while (flash_wr_size < flash_rec_size)
    {
        // TODO: The part below needs to be replaced with analog microphone reading //

        // Read the RAW samples from the microphone
        // Read data from I2S bus, in this case, from ADC. //
        i2s_read(I2S_CHANNEL_NUM, (char *)i2s_readraw_buff, SAMPLE_SIZE, &bytes_read, 100);

        // TODO: The part above needs to be replaced with analog microphone reading //

        // Write the samples to the WAV file
        fwrite(i2s_readraw_buff, 1, bytes_read, f);
        flash_wr_size += bytes_read;
    }

    ESP_LOGI(TAG, "Recording done!");
    fclose(f);
    ESP_LOGI(TAG, "File written on SDCard");

    // All done, unmount partition and disable SPI peripheral
    esp_vfs_fat_sdcard_unmount(SD_MOUNT_POINT, card);
    ESP_LOGI(TAG, "Card unmounted");
    // Deinitialize the bus after all devices are removed
    spi_bus_free(host.slot);
}

void app_main(void)
{
    int rec_time = 5;

    ESP_LOGI(TAG, "Analog microphone recording Example start");
    // Mount the SDCard for recording the audio file
    mount_sdcard();

    // I2S ADC mode microphone init.
    init_microphone();

    ESP_LOGI(TAG, "Starting recording for %d seconds!", rec_time);

    // Start Recording
    record_wav(rec_time);

    // Stop I2S driver and destroy
    ESP_ERROR_CHECK(i2s_driver_uninstall(I2S_COMM_MODE));
}

Let me know what you think the problem is or/and if you have good recommendations for learning material about this subject.


Solution

  • A recent "brush" with I2S had made me aware of its range of capabilities.

    While scanning your code and thinking about your description ("seems to be sampled at 88200Hz"), the token I2S_CHANNEL_FMT_RIGHT_LEFT caught my eye.

    Knowing you'd be recording a mono signal (and without delving deeper into I2S) it seemed reasonable to suggest setting the configuration to one channel only; ie. I2S_CHANNEL_FMT_ONLY_LEFT (or ...RIGHT). (Were you to view a 'hex dump' of a WAV recording previously, my guess is that you'd have seen what @pmacfarlane pointed to: half the sample values are either duplicates or 0 and can/must be tossed.)

    It makes sense that such a capable system (I2S) would also understand, as you've found, I2S_CHANNEL_MONO. I'm glad you found a solution.

    As to 'seems weird', it's only when the apprentice learns deeply from their masters that "weird" ripens into comprehension and appreciation. Those who've gone before were no 'slouches'.

    OT: as you experiment with sampling and writing to an SD card (micro SD?), be aware that writing can have a significant 'overhead'. The uSeconds taken to write 10K are far more than 1/10th of those to write 100K. I suspect the 'set-up' before the writing occurs is substantial. Better to write big blocks, not small fragments that may cause you to miss other events (like the I2S filling its DMA buffer.)

    Happy sampling!