esp-adf 使用pipeline_sonic例程时，SD卡中保存的录音不正确 (AUD-6449)

Environment

Audio development kit: ESP32-S3-Korvo-2
Audio kit version (for ESP32-S3-Korvo-2 v3
[Required] Module or chip used: ESP32-S3-WROOM-1
[Required] IDF version : v5.4.1-4c2820d377
[Required] ADF version : v2.7-106-g40abfc5b
Build system: [Make|CMake|idf.py]
[Required] Running log: All logs from power-on to problem recurrence
Compiler version: xtensa-esp-elf-gcc.exe (crosstool-NG esp-14.2.0_20241119) 14.2.0
Operating system: [Windows]
(Windows only) Environment type: [PowerShell]
Using an IDE?: [Yes (please give details)] VSCode + ESP-IDF
Power supply: [USB]

Problem Description

使用 ESP-ADF 中 pipeline_sonic例程的时候，发现按下录音按键以后采集到的.wav录音文件播放时声音异常（将其插入windows电脑下播放），就像经过变声一样，但是我看见例程介绍中介绍的保存录音文件的时候并没有经过变声处理。

Expected Behavior

Actual Behavior

Steps to Reproduce

step1
...

// If possible, attach a picture of your setup/wiring here.

Code to Reproduce This Issue

// the code should be wrapped in the ```cpp tag so that it will be displayed better.
/* Voice Changer Example with Sonic

   This example code is in the Public Domain (or CC0 licensed, at your option.)

   Unless required by applicable law or agreed to in writing, this
   software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
   CONDITIONS OF ANY KIND, either express or implied.
*/

#include <string.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/timers.h"
#include "esp_log.h"
#include "audio_element.h"
#include "audio_pipeline.h"
#include "audio_event_iface.h"
#include "audio_mem.h"
#include "audio_common.h"
#include "fatfs_stream.h"
#include "i2s_stream.h"
#include "wav_encoder.h"
#include "wav_decoder.h"
#include "board.h"
#include "audio_sonic.h"
#include "esp_peripherals.h"
#include "periph_sdcard.h"
#include "periph_button.h"

static const char *TAG = "SONIC_EXAMPLE";
static esp_periph_set_handle_t set;

#define SAMPLE_RATE         16000
#define CHANNEL             1
#define BITS                16

#define SONIC_PITCH         1.4f
#define SONIC_SPEED         2.0f

static audio_element_handle_t create_sonic()
{
    sonic_cfg_t sonic_cfg = DEFAULT_SONIC_CONFIG();
    sonic_cfg.sonic_info.samplerate = SAMPLE_RATE;
    sonic_cfg.sonic_info.channel = CHANNEL;
    sonic_cfg.sonic_info.resample_linear_interpolate = 1;
    return sonic_init(&sonic_cfg);
}

static audio_element_handle_t create_fatfs_stream(int sample_rates, int bits, int channels, audio_stream_type_t type)
{
    fatfs_stream_cfg_t fatfs_cfg = FATFS_STREAM_CFG_DEFAULT();
    fatfs_cfg.type = type;
    audio_element_handle_t fatfs_stream = fatfs_stream_init(&fatfs_cfg);
    mem_assert(fatfs_stream);
    audio_element_info_t writer_info = {0};
    audio_element_getinfo(fatfs_stream, &writer_info);
    writer_info.bits = bits;
    writer_info.channels = channels;
    writer_info.sample_rates = sample_rates;
    audio_element_setinfo(fatfs_stream, &writer_info);
    return fatfs_stream;
}

static audio_element_handle_t create_i2s_stream(int sample_rates, int bits, int channels, audio_stream_type_t type)
{
    i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT();
    i2s_cfg.type = type;
#if defined CONFIG_ESP_LYRAT_MINI_V1_1_BOARD
    if (i2s_cfg.type == AUDIO_STREAM_READER) {
#if (ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0))
        i2s_cfg.chan_cfg.id = CODEC_ADC_I2S_PORT;
        i2s_cfg.std_cfg.slot_cfg.slot_mode = I2S_SLOT_MODE_MONO;
        i2s_cfg.std_cfg.slot_cfg.slot_mask = I2S_STD_SLOT_LEFT;
#else
        i2s_cfg.i2s_port = CODEC_ADC_I2S_PORT;
        i2s_cfg.i2s_config.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT;
#endif
    }
#endif

    audio_element_handle_t i2s_stream = i2s_stream_init(&i2s_cfg);
    mem_assert(i2s_stream);
    audio_element_set_music_info(i2s_stream, sample_rates, channels, bits);
    return i2s_stream;
}

static audio_element_handle_t create_wav_encoder()
{
    wav_encoder_cfg_t wav_cfg = DEFAULT_WAV_ENCODER_CONFIG();
    return wav_encoder_init(&wav_cfg);
}

static audio_element_handle_t create_wav_decoder()
{
    wav_decoder_cfg_t wav_cfg = DEFAULT_WAV_DECODER_CONFIG();
    return wav_decoder_init(&wav_cfg);
}

void record_playback_task()
{
    audio_pipeline_handle_t pipeline_rec = NULL;
    audio_pipeline_handle_t pipeline_play = NULL;
    audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();

    /**
     * For the Recorder:
     * We will setup I2S and get audio at sample rates 16000Hz, 16-bits, 1 channel.
     * And the audio stream will be encoded with Wav encoder.
     * Then the audio stream will be written to SDCARD.
     */
    ESP_LOGI(TAG, "[1.1] Initialize recorder pipeline");
    pipeline_rec = audio_pipeline_init(&pipeline_cfg);
    pipeline_play = audio_pipeline_init(&pipeline_cfg);

    ESP_LOGI(TAG, "[1.2] Create audio elements for recorder pipeline");
    audio_element_handle_t i2s_reader_el = create_i2s_stream(SAMPLE_RATE, BITS, CHANNEL, AUDIO_STREAM_READER);
    audio_element_handle_t wav_encoder_el = create_wav_encoder();
    audio_element_handle_t fatfs_writer_el = create_fatfs_stream(SAMPLE_RATE, BITS, CHANNEL, AUDIO_STREAM_WRITER);

    ESP_LOGI(TAG, "[1.3] Register audio elements to recorder pipeline");
    audio_pipeline_register(pipeline_rec, i2s_reader_el, "i2s_reader");
    audio_pipeline_register(pipeline_rec, wav_encoder_el, "wav_encoder");
    audio_pipeline_register(pipeline_rec, fatfs_writer_el, "file_writer");
    const char *link_rec[3] = {"i2s_reader", "wav_encoder", "file_writer"};
    audio_pipeline_link(pipeline_rec, &link_rec[0], 3);

    /**
     * For the Playback:
     * We will read the recorded file processed by sonic.
     */
    ESP_LOGI(TAG, "[2.2] Create audio elements for playback pipeline");
    audio_element_handle_t fatfs_reader_el = create_fatfs_stream(SAMPLE_RATE, BITS, CHANNEL, AUDIO_STREAM_READER);
    audio_element_handle_t wav_decoder_el = create_wav_decoder();
    //audio_element_handle_t sonic_el = create_sonic();
    audio_element_handle_t i2s_writer_el = create_i2s_stream(SAMPLE_RATE, BITS, CHANNEL, AUDIO_STREAM_WRITER);

    ESP_LOGI(TAG, "[2.3] Register audio elements to playback pipeline");
    audio_pipeline_register(pipeline_play, fatfs_reader_el, "file_reader");
    audio_pipeline_register(pipeline_play, wav_decoder_el, "wav_decoder");
    //audio_pipeline_register(pipeline_play, sonic_el, "sonic");
    audio_pipeline_register(pipeline_play, i2s_writer_el, "i2s_writer");
    
    // const char *link_play[4] = {"file_reader", "wav_decoder", "sonic", "i2s_writer"};
    // audio_pipeline_link(pipeline_play, &link_play[0], 4);
    const char *link_play[] = {"file_reader", "wav_decoder", "i2s_writer"};
    audio_pipeline_link(pipeline_play, &link_play[0], 3);

    ESP_LOGI(TAG, "[ 3 ] Set up  event listener");
    audio_event_iface_cfg_t evt_cfg = AUDIO_EVENT_IFACE_DEFAULT_CFG();
    audio_event_iface_handle_t evt = audio_event_iface_init(&evt_cfg);
    audio_event_iface_set_listener(esp_periph_set_get_event_iface(set), evt);
    ESP_LOGW(TAG, "Press [Rec] to start recording");
    bool is_modify_speed = true;
    while (1) {
        audio_event_iface_msg_t msg;
        esp_err_t ret = audio_event_iface_listen(evt, &msg, portMAX_DELAY);
        if (ret != ESP_OK) {
            ESP_LOGE(TAG, "[ * ] Event interface error : %d", ret);
            continue;
        }
        if ((int)msg.data == get_input_mode_id()) {
            if ((msg.cmd == PERIPH_BUTTON_LONG_PRESSED)
                || (msg.cmd == PERIPH_BUTTON_PRESSED)) {
                is_modify_speed = !is_modify_speed;
                if (is_modify_speed) {
                    ESP_LOGI(TAG, "The speed of audio file is changed");
                } else {
                    ESP_LOGI(TAG, "The pitch of audio file is changed");
                }
            }
            continue;
        }
        if ((int)msg.data == get_input_rec_id()) {
            if (msg.cmd == PERIPH_BUTTON_PRESSED) {
                //using LOGE to make the log color different
                ESP_LOGE(TAG, "Now recording, release [Rec] to STOP");
                audio_pipeline_stop(pipeline_play);
                audio_pipeline_wait_for_stop(pipeline_play);
                audio_pipeline_terminate(pipeline_play);
                audio_pipeline_reset_ringbuffer(pipeline_play);
                audio_pipeline_reset_elements(pipeline_play);

                /**
                 * Audio Recording Flow:
                 * [codec_chip]-->i2s_stream--->wav_encoder-->fatfs_stream-->[sdcard]
                 */
                ESP_LOGI(TAG, "Setup file path to save recorded audio");
                i2s_stream_set_clk(i2s_reader_el, SAMPLE_RATE, BITS, CHANNEL);
                audio_element_set_uri(fatfs_writer_el, "/sdcard/rec.wav");
                audio_pipeline_run(pipeline_rec);
            } else if (msg.cmd == PERIPH_BUTTON_RELEASE || msg.cmd == PERIPH_BUTTON_LONG_RELEASE) {
                ESP_LOGI(TAG, "START Playback");
                audio_pipeline_stop(pipeline_rec);
                audio_pipeline_wait_for_stop(pipeline_rec);
                audio_pipeline_terminate(pipeline_rec);
                audio_pipeline_reset_ringbuffer(pipeline_rec);
                audio_pipeline_reset_elements(pipeline_rec);

                /**
                 * Audio Playback Flow:
                 * [sdcard]-->fatfs_stream-->wav_decoder-->sonic-->i2s_stream-->[codec_chip]
                 */
                ESP_LOGI(TAG, "Setup file path to read the wav audio to play");
                i2s_stream_set_clk(i2s_writer_el, SAMPLE_RATE, BITS, CHANNEL);
                audio_element_set_uri(fatfs_reader_el, "/sdcard/rec.wav");
                // if (is_modify_speed) {
                //     sonic_set_pitch_and_speed_info(sonic_el, 1.0f, SONIC_SPEED);
                // } else {
                //     sonic_set_pitch_and_speed_info(sonic_el, SONIC_PITCH, 1.0f);
                // }
                audio_pipeline_run(pipeline_play);
            }
        }
    }

    ESP_LOGI(TAG, "[ 4 ] Stop audio_pipeline");
    audio_pipeline_stop(pipeline_rec);
    audio_pipeline_wait_for_stop(pipeline_rec);
    audio_pipeline_terminate(pipeline_rec);
    audio_pipeline_stop(pipeline_play);
    audio_pipeline_wait_for_stop(pipeline_play);
    audio_pipeline_terminate(pipeline_play);

    audio_pipeline_unregister(pipeline_play, fatfs_reader_el);
    audio_pipeline_unregister(pipeline_play, wav_decoder_el);
    audio_pipeline_unregister(pipeline_play, i2s_writer_el);

    audio_pipeline_unregister(pipeline_rec, i2s_reader_el);
    // audio_pipeline_unregister(pipeline_rec, sonic_el);
    audio_pipeline_unregister(pipeline_rec, wav_encoder_el);
    audio_pipeline_unregister(pipeline_rec, fatfs_writer_el);

    /* Terminate the pipeline before removing the listener */
    audio_pipeline_remove_listener(pipeline_rec);
    audio_pipeline_remove_listener(pipeline_play);

    /* Stop all peripherals before removing the listener */
    esp_periph_set_stop_all(set);
    audio_event_iface_remove_listener(esp_periph_set_get_event_iface(set), evt);

    /* Make sure audio_pipeline_remove_listener & audio_event_iface_remove_listener are called before destroying event_iface */
    audio_event_iface_destroy(evt);

    /* Release all resources */
    audio_pipeline_deinit(pipeline_rec);
    audio_pipeline_deinit(pipeline_play);

    audio_element_deinit(fatfs_reader_el);
    audio_element_deinit(wav_decoder_el);
    audio_element_deinit(i2s_writer_el);

    audio_element_deinit(i2s_reader_el);
    // audio_element_deinit(sonic_el);
    audio_element_deinit(wav_encoder_el);
    audio_element_deinit(fatfs_writer_el);
}

void app_main(void)
{
    esp_log_level_set("*", ESP_LOG_WARN);
    esp_log_level_set(TAG, ESP_LOG_INFO);

    // Initialize peripherals management
    esp_periph_config_t periph_cfg = DEFAULT_ESP_PERIPH_SET_CONFIG();
    set = esp_periph_set_init(&periph_cfg);

    // Initialize SD Card peripheral
    audio_board_sdcard_init(set, SD_MODE_1_LINE);

    // Initialize Button peripheral
    audio_board_key_init(set);

    // Setup audio codec
    audio_board_handle_t board_handle = audio_board_init();
    audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START);

    // Start record/playback task
    record_playback_task();
    esp_periph_set_destroy(set);
}

Debug Logs

I (27) boot: ESP-IDF v5.4.1-dirty 2nd stage bootloader
I (27) boot: compile time Jun 16 2025 16:14:48
I (27) boot: Multicore bootloader
I (28) boot: chip revision: v0.2
I (30) boot: efuse block revision: v1.3
I (34) boot.esp32s3: Boot SPI Speed : 80MHz
I (38) boot.esp32s3: SPI Mode       : DIO
I (42) boot.esp32s3: SPI Flash Size : 16MB
I (45) boot: Enabling RNG early entropy source...
I (50) boot: Partition Table:
I (53) boot: ## Label            Usage          Type ST Offset   Length
I (59) boot:  0 nvs              WiFi data        01 02 00009000 00006000
I (65) boot:  1 phy_init         RF data          01 01 0000f000 00001000
I (72) boot:  2 factory          factory app      00 00 00010000 00100000
I (78) boot: End of partition table
I (82) esp_image: segment 0: paddr=00010020 vaddr=3c050020 size=17f10h ( 98064) map
I (106) esp_image: segment 1: paddr=00027f38 vaddr=3fc96600 size=02e24h ( 11812) load
I (109) esp_image: segment 2: paddr=0002ad64 vaddr=40374000 size=052b4h ( 21172) load
I (115) esp_image: segment 3: paddr=00030020 vaddr=42000020 size=4459ch (279964) map
I (167) esp_image: segment 4: paddr=000745c4 vaddr=403792b4 size=0d308h ( 54024) load
I (179) esp_image: segment 5: paddr=000818d4 vaddr=600fe100 size=0001ch (    28) load
I (186) boot: Loaded app from partition at offset 0x10000
I (187) boot: Disabling RNG early entropy source...
I (197) cpu_start: Multicore app
I (206) cpu_start: Pro cpu start user code
I (206) cpu_start: cpu freq: 160000000 Hz
I (206) app_init: Application information:
I (206) app_init: Project name:     demo_pipeline_sonic
I (211) app_init: App version:      1
I (214) app_init: Compile time:     Jun 16 2025 17:59:28
I (219) app_init: ELF file SHA256:  cdadd966f...
I (224) app_init: ESP-IDF:          v5.4.1-dirty
I (228) efuse_init: Min chip rev:     v0.0
I (232) efuse_init: Max chip rev:     v0.99
I (236) efuse_init: Chip rev:         v0.2
I (240) heap_init: Initializing. RAM available for dynamic allocation:
I (246) heap_init: At 3FC9A128 len 0004F5E8 (317 KiB): RAM
I (251) heap_init: At 3FCE9710 len 00005724 (21 KiB): RAM
I (256) heap_init: At 3FCF0000 len 00008000 (32 KiB): DRAM
I (262) heap_init: At 600FE11C len 00001ECC (7 KiB): RTCRAM
I (268) spi_flash: detected chip: gd
I (270) spi_flash: flash io: dio
W (273) ADC: legacy driver is deprecated, please migrate to `esp_adc/adc_oneshot.h`
I (281) sleep_gpio: Configure to isolate all GPIO pins in sleep state
I (287) sleep_gpio: Enable automatic switching of GPIO sleep configuration
I (294) main_task: Started on CPU0
I (324) main_task: Calling app_main()
W (824) i2c_bus_v2: I2C master handle is NULL, will create new one
W (864) ES7210: Enable TDM mode. ES7210_SDP_INTERFACE2_REG12: 2
I (874) SONIC_EXAMPLE: [1.1] Initialize recorder pipeline
I (874) SONIC_EXAMPLE: [1.2] Create audio elements for recorder pipeline
I (884) SONIC_EXAMPLE: [1.3] Register audio elements to recorder pipeline
I (884) SONIC_EXAMPLE: [2.2] Create audio elements for playback pipeline
I (894) SONIC_EXAMPLE: [2.3] Register audio elements to playback pipeline
I (894) SONIC_EXAMPLE: [ 3 ] Set up  event listener
W (904) SONIC_EXAMPLE: Press [Rec] to start recording
E (6434) SONIC_EXAMPLE: Now recording, release [Rec] to STOP
W (6434) AUDIO_PIPELINE: Without stop, st:1
W (6434) AUDIO_PIPELINE: Without wait stop, st:1
W (6434) AUDIO_ELEMENT: [file_reader] Element has not create when AUDIO_ELEMENT_TERMINATE
W (6444) AUDIO_ELEMENT: [wav_decoder] Element has not create when AUDIO_ELEMENT_TERMINATE
W (6444) AUDIO_ELEMENT: [i2s_writer] Element has not create when AUDIO_ELEMENT_TERMINATE
I (6454) SONIC_EXAMPLE: Setup file path to save recorded audio
W (6464) AUDIO_THREAD: Make sure selected the `CONFIG_SPIRAM_BOOT_INIT` and `CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY` by `make menuconfig`
I (9134) SONIC_EXAMPLE: START Playback
W (9134) AUDIO_ELEMENT: IN-[wav_encoder] AEL_IO_ABORT
W (9134) AUDIO_ELEMENT: [wav_encoder] Element already stopped
I (9144) SONIC_EXAMPLE: Setup file path to read the wav audio to play
W (9144) AUDIO_THREAD: Make sure selected the `CONFIG_SPIRAM_BOOT_INIT` and `CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY` by `make menuconfig`

Other Items If Possible

[ ] sdkconfig file (Attach the sdkconfig file from your project folder)
[ ] elf file in the build folder (Note this may contain all the code details and symbols of your project.)
[ ] coredump (This provides stacks of tasks.)

Jun 16 '25 10:06 Crazy-lxy

在新版本的 i2s_stream_idf5 中，没有对 MONO 状态下指定声道，可以在 create_i2s_stream 函数中加入如下代码：

i2s_stream_set_channel_type(&i2s_cfg, I2S_CHANNEL_TYPE_ONLY_LEFT);

即可正常录音

Jun 27 '25 02:06 520lbl

实验了一下问题解决了谢谢(^🙏^)

Jun 27 '25 03:06 Crazy-lxy