ESP32-C3 Keyword Spotting (“Yes/No”) with TFLite Micro micro_speech using the Onboard Mic (ADC)
This post documents how to run
TensorFlow Lite for Microcontrollers (TFLM)
(now branded as
LiteRT for Microcontrollers)
keyword spotting example (micro_speech)
on an ESP32-C3 board, and how to adapt the example to use the onboard analog microphone routed through the ESP32-C3 ADC. To set up the ESP32-C3-lyra V2, see this post:
Hello World on ESP-32-C3-Lyra V2.0
Environment / Versions
- Target board: ESP32-C3 (ESP32-C3-Lyra)
- ESP-IDF version: v6.x (or v6.0-dev)
- Example project:
esp-tflite-micro:micro_speech(keyword spotting “yes/no”)
driver/adc.h) and renamed some ADC attenuation enums.
The ADC code below reflects those v6+ changes.
Command Log
1) Load the ESP-IDF environment
. $HOME/esp/esp-idf/export.sh
2) Create a new project from the micro_speech example
cd ~
idf.py create-project-from-example "espressif/esp-tflite-micro=1.3.0:micro_speech"
mv ~/micro_speech ~/keyword_spotting_tflm
3) Set the target to ESP32-C3
cd ~/keyword_spotting_tflm && idf.py set-target esp32c3
4) Install/verify ESP-IDF tools for ESP32-C3 (v6+ toolchain)
python3 $IDF_PATH/tools/idf_tools.py install --targets esp32c3
. $HOME/esp/esp-idf/export.sh
5) Build
cd ~/keyword_spotting_tflm && idf.py build
6) Flash and open the serial monitor
cd ~/keyword_spotting_tflm && idf.py -p /dev/ttyUSB0 flash monitor
Ctrl + ]
Troubleshooting
Issue: Default example tried I2S and failed
The upstream micro_speech project’s audio capture path attempted to configure I2S pins (I2S microphone).
On this setup, we used the onboard mic through ADC instead. The following error occurs:
E (...) i2s_set_pin(...): bck_io_num invalid
E (...) TF_LITE_AUDIO_PROVIDER: Error in i2s_set_pin
Fix: Switch audio capture from I2S to ADC continuous sampling
Key points of the ADC implementation:
- Use
esp_adc/adc_continuous.hcontinuous mode to sample at16 kHz. - Convert 12-bit unsigned ADC samples into signed 16-bit PCM-like samples centered around mid-scale.
- Write samples into the existing ring buffer so the model’s
GetAudioSamples()continues to work.
Replace audio_provider.cc with the working ADC version (ESP32-C3 TYPE2) by replacing the entire contents of:
~/keyword_spotting_tflm/main/audio_provider.cc
with the following code:
/* ADC-based audio provider for ESP32-C3-Lyra (MIC_ADC on IO0 / ADC1 CH0) */
#include "audio_provider.h"
#include <cstring>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_log.h"
#include "esp_adc/adc_continuous.h"
#include "ringbuf.h"
#include "micro_model_settings.h"
static const char* TAG = "TF_LITE_AUDIO_PROVIDER";
ringbuf_t* g_audio_capture_buffer;
volatile int32_t g_latest_audio_timestamp = 0;
constexpr int32_t history_samples_to_keep =
((kFeatureDurationMs - kFeatureStrideMs) * (kAudioSampleFrequency / 1000));
constexpr int32_t new_samples_to_get =
(kFeatureStrideMs * (kAudioSampleFrequency / 1000));
const int32_t kAudioCaptureBufferSize = 40000;
namespace {
int16_t g_audio_output_buffer[kMaxAudioSampleSize * 32];
bool g_is_audio_initialized = false;
int16_t g_history_buffer[history_samples_to_keep];
adc_continuous_handle_t g_adc_handle = NULL;
// Read buffer (raw ADC frames)
static constexpr size_t kAdcReadBytes = 1024;
uint8_t g_adc_read_buf[kAdcReadBytes];
// Temporary PCM buffer (int16)
int16_t g_pcm_buf[kAdcReadBytes / sizeof(adc_digi_output_data_t)];
// ESP32-C3-Lyra MIC_ADC is routed to IO0 => ADC1 channel 0
static constexpr adc_unit_t kAdcUnit = ADC_UNIT_1;
static constexpr adc_channel_t kAdcChannel = ADC_CHANNEL_0;
static constexpr adc_atten_t kAdcAtten = ADC_ATTEN_DB_12;
static constexpr adc_bitwidth_t kAdcBitwidth = ADC_BITWIDTH_12;
} // namespace
static void adc_init_continuous() {
adc_continuous_handle_cfg_t handle_cfg = {
.max_store_buf_size = 4096,
.conv_frame_size = 1024,
};
ESP_ERROR_CHECK(adc_continuous_new_handle(&handle_cfg, &g_adc_handle));
adc_digi_pattern_config_t pattern = {};
pattern.atten = kAdcAtten;
pattern.channel = kAdcChannel;
pattern.unit = kAdcUnit;
pattern.bit_width = kAdcBitwidth;
adc_continuous_config_t dig_cfg = {};
dig_cfg.sample_freq_hz = kAudioSampleFrequency; // 16 kHz
dig_cfg.conv_mode = ADC_CONV_SINGLE_UNIT_1;
// ESP32-C3 DMA output uses TYPE2 layout
dig_cfg.format = ADC_DIGI_OUTPUT_FORMAT_TYPE2;
dig_cfg.pattern_num = 1;
dig_cfg.adc_pattern = &pattern;
ESP_ERROR_CHECK(adc_continuous_config(g_adc_handle, &dig_cfg));
ESP_ERROR_CHECK(adc_continuous_start(g_adc_handle));
}
static inline int16_t adc12_to_pcm16(uint16_t adc12) {
int32_t centered = (int32_t)adc12 - 2048;
int32_t pcm = centered << 4; // scale 12-bit to ~16-bit
if (pcm > 32767) pcm = 32767;
if (pcm < -32768) pcm = -32768;
return (int16_t)pcm;
}
static void CaptureSamples(void* arg) {
adc_init_continuous();
while (true) {
uint32_t out_bytes = 0;
esp_err_t ret = adc_continuous_read(
g_adc_handle, g_adc_read_buf, kAdcReadBytes, &out_bytes, pdMS_TO_TICKS(200));
if (ret == ESP_OK && out_bytes > 0) {
const size_t n_frames = out_bytes / sizeof(adc_digi_output_data_t);
for (size_t i = 0; i < n_frames; i++) {
const adc_digi_output_data_t* p =
(const adc_digi_output_data_t*)(g_adc_read_buf +
i * sizeof(adc_digi_output_data_t));
// ESP32-C3 uses type2 layout (type1 will not compile)
uint16_t raw = (uint16_t)(p->type2.data);
g_pcm_buf[i] = adc12_to_pcm16(raw);
}
const int bytes_to_write = (int)(n_frames * sizeof(int16_t));
const int bytes_written = rb_write(g_audio_capture_buffer,
(uint8_t*)g_pcm_buf,
bytes_to_write,
pdMS_TO_TICKS(200));
if (bytes_written > 0) {
const int samples_written = bytes_written / (int)sizeof(int16_t);
g_latest_audio_timestamp += (1000 * samples_written) / kAudioSampleFrequency;
}
}
if (ret != ESP_OK && ret != ESP_ERR_TIMEOUT) {
ESP_LOGE(TAG, "adc_continuous_read failed: %s", esp_err_to_name(ret));
vTaskDelay(pdMS_TO_TICKS(50));
}
}
}
TfLiteStatus InitAudioRecording() {
g_audio_capture_buffer = rb_init("tf_ringbuffer", kAudioCaptureBufferSize);
if (!g_audio_capture_buffer) {
ESP_LOGE(TAG, "Error creating ring buffer");
return kTfLiteError;
}
xTaskCreate(CaptureSamples, "CaptureSamples", 1024 * 4, NULL, 10, NULL);
while (!g_latest_audio_timestamp) {
vTaskDelay(1);
}
ESP_LOGI(TAG, "Audio Recording started (ADC continuous)");
return kTfLiteOk;
}
TfLiteStatus GetAudioSamples1(int* audio_samples_size, int16_t** audio_samples) {
if (!g_is_audio_initialized) {
TfLiteStatus init_status = InitAudioRecording();
if (init_status != kTfLiteOk) {
return init_status;
}
g_is_audio_initialized = true;
}
int bytes_read =
rb_read(g_audio_capture_buffer, (uint8_t*)(g_audio_output_buffer), 16000, 1000);
if (bytes_read < 0) {
ESP_LOGI(TAG, "Couldn't read data in time");
bytes_read = 0;
}
*audio_samples_size = bytes_read;
*audio_samples = g_audio_output_buffer;
return kTfLiteOk;
}
TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
int* audio_samples_size, int16_t** audio_samples) {
if (!g_is_audio_initialized) {
TfLiteStatus init_status = InitAudioRecording();
if (init_status != kTfLiteOk) {
return init_status;
}
g_is_audio_initialized = true;
}
memcpy((void*)(g_audio_output_buffer), (void*)(g_history_buffer),
history_samples_to_keep * sizeof(int16_t));
int bytes_read =
rb_read(g_audio_capture_buffer,
((uint8_t*)(g_audio_output_buffer + history_samples_to_keep)),
new_samples_to_get * sizeof(int16_t), pdMS_TO_TICKS(200));
if (bytes_read < 0) {
ESP_LOGE(TAG, "Model could not read data from Ring Buffer");
}
memcpy((void*)(g_history_buffer),
(void*)(g_audio_output_buffer + new_samples_to_get),
history_samples_to_keep * sizeof(int16_t));
*audio_samples_size = kMaxAudioSampleSize;
*audio_samples = g_audio_output_buffer;
return kTfLiteOk;
}
int32_t LatestAudioTimestamp() { return g_latest_audio_timestamp; }
Issue: Missing header esp_adc/adc_continuous.h
After adding the include, the build failed with:
fatal error: esp_adc/adc_continuous.h: No such file or directory
Fix: Add the esp_adc component dependency
Edit main/CMakeLists.txt to include esp_adc to PRIV_REQUIRES (or REQUIRES):
nano ~/keyword_spotting_tflm/main/CMakeLists.txt
idf_component_register(
SRCS ...
INCLUDE_DIRS .
PRIV_REQUIRES esp_adc
)
Issue: adc_digi_output_data_t had no type1 on ESP32-C3
Build error:
error: 'const struct adc_digi_output_data_t' has no member named 'type1'
Fix: Use the ESP32-C3 struct layout (TYPE2)
Make the following changes in the file keyword_spotting_tflm/main/audio_provider.cc:
ADC_DIGI_OUTPUT_FORMAT_TYPE1→ADC_DIGI_OUTPUT_FORMAT_TYPE2p->type1.data→p->type2.data
Next, rebuild and reflash:
cd ~/keyword_spotting_tflm && idf.py build
cd ~/keyword_spotting_tflm && idf.py -p /dev/ttyUSB0 flash monitor
Issue: Toolchain version mismatch on ESP-IDF v6+
If the build fails with a toolchain mismatch (e.g., expected esp-15.2.0_20250929), install the ESP32-C3 toolchain:
python3 $IDF_PATH/tools/idf_tools.py install --targets esp32c3
. $HOME/esp/esp-idf/export.sh
Issue: idf.py fullclean refuses
If idf.py fullclean refuses to delete the build directory, delete it manually:
cd ~/keyword_spotting_tflm
rm -rf build
idf.py build
After switching the audio provider to ADC and aligning the ADC DMA output format for ESP32-C3, the application ran successfully and recognized the keywords “yes” and “no” over serial output. The next post will include customization for keyword spotting with additional words.



















