Add wake word to xmini-c3 (#730)
* esp-hi: MCP protocol is not ready yet * Add wake word to xmini-c3
This commit is contained in:
parent
6cb025859f
commit
ae57131c15
1
.gitignore
vendored
1
.gitignore
vendored
@ -10,5 +10,6 @@ dependencies.lock
|
||||
.env
|
||||
releases/
|
||||
main/assets/lang_config.h
|
||||
main/mmap_generate_emoji.h
|
||||
.DS_Store
|
||||
.cache
|
||||
@ -194,13 +194,14 @@ list(APPEND SOURCES ${BOARD_SOURCES})
|
||||
if(CONFIG_USE_AUDIO_PROCESSOR)
|
||||
list(APPEND SOURCES "audio_processing/afe_audio_processor.cc")
|
||||
else()
|
||||
list(APPEND SOURCES "audio_processing/dummy_audio_processor.cc")
|
||||
list(APPEND SOURCES "audio_processing/no_audio_processor.cc")
|
||||
endif()
|
||||
if(CONFIG_USE_WAKE_WORD_DETECT)
|
||||
list(APPEND SOURCES "audio_processing/wake_word_detect.cc")
|
||||
endif()
|
||||
if(CONFIG_USE_WAKE_WORD_DETECT_NO_AFE)
|
||||
list(APPEND SOURCES "audio_processing/wake_word_no_afe.cc")
|
||||
if(CONFIG_USE_AFE_WAKE_WORD)
|
||||
list(APPEND SOURCES "audio_processing/afe_wake_word.cc")
|
||||
elseif(CONFIG_USE_ESP_WAKE_WORD)
|
||||
list(APPEND SOURCES "audio_processing/esp_wake_word.cc")
|
||||
else()
|
||||
list(APPEND SOURCES "audio_processing/no_wake_word.cc")
|
||||
endif()
|
||||
|
||||
# 根据Kconfig选择语言目录
|
||||
|
||||
@ -30,152 +30,226 @@ choice BOARD_TYPE
|
||||
Board type. 开发板类型
|
||||
config BOARD_TYPE_BREAD_COMPACT_WIFI
|
||||
bool "面包板新版接线(WiFi)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_BREAD_COMPACT_WIFI_LCD
|
||||
bool "面包板新版接线(WiFi)+ LCD"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_BREAD_COMPACT_ML307
|
||||
bool "面包板新版接线(ML307 AT)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_BREAD_COMPACT_ESP32
|
||||
bool "面包板(WiFi) ESP32 DevKit"
|
||||
depends on IDF_TARGET_ESP32
|
||||
config BOARD_TYPE_BREAD_COMPACT_ESP32_LCD
|
||||
bool "面包板(WiFi+ LCD) ESP32 DevKit"
|
||||
depends on IDF_TARGET_ESP32
|
||||
config BOARD_TYPE_XMINI_C3
|
||||
bool "虾哥 Mini C3"
|
||||
depends on IDF_TARGET_ESP32C3
|
||||
config BOARD_TYPE_ESP32S3_KORVO2_V3
|
||||
bool "ESP32S3_KORVO2_V3开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP_SPARKBOT
|
||||
bool "ESP-SparkBot开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP_SPOT_S3
|
||||
bool "ESP-Spot-S3"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP_HI
|
||||
bool "ESP-HI"
|
||||
depends on IDF_TARGET_ESP32C3
|
||||
config BOARD_TYPE_ESP_BOX_3
|
||||
bool "ESP BOX 3"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP_BOX
|
||||
bool "ESP BOX"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP_BOX_LITE
|
||||
bool "ESP BOX Lite"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_KEVIN_BOX_1
|
||||
bool "Kevin Box 1"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_KEVIN_BOX_2
|
||||
bool "Kevin Box 2"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_KEVIN_C3
|
||||
bool "Kevin C3"
|
||||
depends on IDF_TARGET_ESP32C3
|
||||
config BOARD_TYPE_KEVIN_SP_V3_DEV
|
||||
bool "Kevin SP V3开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_KEVIN_SP_V4_DEV
|
||||
bool "Kevin SP V4开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32_CGC
|
||||
bool "ESP32 CGC"
|
||||
depends on IDF_TARGET_ESP32
|
||||
config BOARD_TYPE_KEVIN_YUYING_313LCD
|
||||
bool "鱼鹰科技3.13LCD开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_LICHUANG_DEV
|
||||
bool "立创·实战派ESP32-S3开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_LICHUANG_C3_DEV
|
||||
bool "立创·实战派ESP32-C3开发板"
|
||||
depends on IDF_TARGET_ESP32C3
|
||||
config BOARD_TYPE_DF_K10
|
||||
bool "DFRobot 行空板 k10"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_DF_S3_AI_CAM
|
||||
bool "DFRobot ESP32-S3 AI智能摄像头模块"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_MAGICLICK_2P4
|
||||
bool "神奇按钮 Magiclick_2.4"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_MAGICLICK_2P5
|
||||
bool "神奇按钮 Magiclick_2.5"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_MAGICLICK_C3
|
||||
bool "神奇按钮 Magiclick_C3"
|
||||
depends on IDF_TARGET_ESP32C3
|
||||
config BOARD_TYPE_MAGICLICK_C3_V2
|
||||
bool "神奇按钮 Magiclick_C3_v2"
|
||||
depends on IDF_TARGET_ESP32C3
|
||||
config BOARD_TYPE_M5STACK_CORE_S3
|
||||
bool "M5Stack CoreS3"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_M5STACK_CORE_TAB5
|
||||
bool "M5Stack Tab5"
|
||||
depends on IDF_TARGET_ESP32P4
|
||||
config BOARD_TYPE_ATOMS3_ECHO_BASE
|
||||
bool "AtomS3 + Echo Base"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATOMS3R_ECHO_BASE
|
||||
bool "AtomS3R + Echo Base"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATOMS3R_CAM_M12_ECHO_BASE
|
||||
bool "AtomS3R CAM/M12 + Echo Base"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATOMMATRIX_ECHO_BASE
|
||||
bool "AtomMatrix + Echo Base"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_8
|
||||
bool "Waveshare ESP32-S3-Touch-AMOLED-1.8"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_75
|
||||
bool "Waveshare ESP32-S3-Touch-AMOLED-1.75"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32S3_Touch_LCD_1_85C
|
||||
bool "Waveshare ESP32-S3-Touch-LCD-1.85C"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32S3_Touch_LCD_1_85
|
||||
bool "Waveshare ESP32-S3-Touch-LCD-1.85"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32S3_Touch_LCD_1_46
|
||||
bool "Waveshare ESP32-S3-Touch-LCD-1.46"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32S3_Touch_LCD_3_5
|
||||
bool "Waveshare ESP32-S3-Touch-LCD-3.5"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32P4_NANO
|
||||
bool "Waveshare ESP32-P4-NANO"
|
||||
depends on IDF_TARGET_ESP32P4
|
||||
config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_4B
|
||||
bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-4B"
|
||||
depends on IDF_TARGET_ESP32P4
|
||||
config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_XC
|
||||
bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-3.4C or ESP32-P4-WIFI6-Touch-LCD-4C"
|
||||
depends on IDF_TARGET_ESP32P4
|
||||
config BOARD_TYPE_TUDOUZI
|
||||
bool "土豆子"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_LILYGO_T_CIRCLE_S3
|
||||
bool "LILYGO T-Circle-S3"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_0_V1_1
|
||||
bool "LILYGO T-CameraPlus-S3_V1_0_V1_1"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_2
|
||||
bool "LILYGO T-CameraPlus-S3_V1_2"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA
|
||||
bool "LILYGO T-Display-S3-Pro-MVSRLora"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA_NO_BATTERY
|
||||
bool "LILYGO T-Display-S3-Pro-MVSRLora_No_Battery"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_MOVECALL_MOJI_ESP32S3
|
||||
bool "Movecall Moji 小智AI衍生版"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_MOVECALL_CUICAN_ESP32S3
|
||||
bool "Movecall CuiCan 璀璨·AI吊坠"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATK_DNESP32S3
|
||||
bool "正点原子DNESP32S3开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATK_DNESP32S3_BOX
|
||||
bool "正点原子DNESP32S3-BOX"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATK_DNESP32S3_BOX0
|
||||
bool "正点原子DNESP32S3-BOX0"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATK_DNESP32S3M_WIFI
|
||||
bool "正点原子DNESP32S3M-WIFI"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ATK_DNESP32S3M_4G
|
||||
bool "正点原子DNESP32S3M-4G"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_DU_CHATX
|
||||
bool "嘟嘟开发板CHATX(wifi)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32S3_Taiji_Pi
|
||||
bool "太极小派esp32s3"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_XINGZHI_Cube_0_85TFT_WIFI
|
||||
bool "无名科技星智0.85(WIFI)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_XINGZHI_Cube_0_85TFT_ML307
|
||||
bool "无名科技星智0.85(ML307)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_XINGZHI_Cube_0_96OLED_WIFI
|
||||
bool "无名科技星智0.96(WIFI)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_XINGZHI_Cube_0_96OLED_ML307
|
||||
bool "无名科技星智0.96(ML307)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_XINGZHI_Cube_1_54TFT_WIFI
|
||||
bool "无名科技星智1.54(WIFI)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_XINGZHI_Cube_1_54TFT_ML307
|
||||
bool "无名科技星智1.54(ML307)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_SENSECAP_WATCHER
|
||||
bool "SenseCAP Watcher"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_DOIT_S3_AIBOX
|
||||
bool "四博智联AI陪伴盒子"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_MIXGO_NOVA
|
||||
bool "元控·青春"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_GENJUTECH_S3_1_54TFT
|
||||
bool "亘具科技1.54(s3)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP_S3_LCD_EV_Board
|
||||
bool "乐鑫ESP S3 LCD EV Board开发板"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ZHENGCHEN_1_54TFT_WIFI
|
||||
bool "征辰科技1.54(WIFI)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ZHENGCHEN_1_54TFT_ML307
|
||||
bool "征辰科技1.54(ML307)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_MINSI_K08_DUAL
|
||||
bool "敏思科技K08(DUAL)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32_S3_1_54_MUMA
|
||||
bool "Spotpear ESP32-S3-1.54-MUMA"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
config BOARD_TYPE_ESP32_S3_1_28_BOX
|
||||
bool "Spotpear ESP32-S3-1.28-BOX"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
endchoice
|
||||
|
||||
choice ESP_S3_LCD_EV_Board_Version_TYPE
|
||||
@ -270,24 +344,26 @@ config USE_WECHAT_MESSAGE_STYLE
|
||||
help
|
||||
使用微信聊天界面风格
|
||||
|
||||
config USE_WAKE_WORD_DETECT_NO_AFE
|
||||
config USE_ESP_WAKE_WORD
|
||||
bool "Enable Wake Word Detection (without AFE)"
|
||||
default y
|
||||
depends on IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C5
|
||||
|
||||
config USE_WAKE_WORD_DETECT
|
||||
bool "Enable Wake Word Detection"
|
||||
default y
|
||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM
|
||||
help
|
||||
需要 ESP32 S3 与 AFE 支持
|
||||
支持 ESP32 C3 与 ESP32 C5
|
||||
|
||||
config USE_AFE_WAKE_WORD
|
||||
bool "Enable Wake Word Detection (AFE)"
|
||||
default n
|
||||
depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM
|
||||
help
|
||||
需要 ESP32 S3 与 PSRAM 支持
|
||||
|
||||
config USE_AUDIO_PROCESSOR
|
||||
bool "Enable Audio Noise Reduction"
|
||||
default y
|
||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM
|
||||
depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM
|
||||
help
|
||||
需要 ESP32 S3 与 AFE 支持
|
||||
需要 ESP32 S3 与 PSRAM 支持
|
||||
|
||||
config USE_DEVICE_AEC
|
||||
bool "Enable Device-Side AEC"
|
||||
@ -297,7 +373,7 @@ config USE_DEVICE_AEC
|
||||
因为性能不够,不建议和微信聊天界面风格同时开启
|
||||
|
||||
config USE_SERVER_AEC
|
||||
bool "Enable Server-Side AEC"
|
||||
bool "Enable Server-Side AEC (Unstable)"
|
||||
default n
|
||||
depends on USE_AUDIO_PROCESSOR
|
||||
help
|
||||
|
||||
@ -14,7 +14,15 @@
|
||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||
#include "afe_audio_processor.h"
|
||||
#else
|
||||
#include "dummy_audio_processor.h"
|
||||
#include "no_audio_processor.h"
|
||||
#endif
|
||||
|
||||
#if CONFIG_USE_AFE_WAKE_WORD
|
||||
#include "afe_wake_word.h"
|
||||
#elif CONFIG_USE_ESP_WAKE_WORD
|
||||
#include "esp_wake_word.h"
|
||||
#else
|
||||
#include "no_wake_word.h"
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
@ -55,7 +63,15 @@ Application::Application() {
|
||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||
audio_processor_ = std::make_unique<AfeAudioProcessor>();
|
||||
#else
|
||||
audio_processor_ = std::make_unique<DummyAudioProcessor>();
|
||||
audio_processor_ = std::make_unique<NoAudioProcessor>();
|
||||
#endif
|
||||
|
||||
#if CONFIG_USE_AFE_WAKE_WORD
|
||||
wake_word_ = std::make_unique<AfeWakeWord>();
|
||||
#elif CONFIG_USE_ESP_WAKE_WORD
|
||||
wake_word_ = std::make_unique<EspWakeWord>();
|
||||
#else
|
||||
wake_word_ = std::make_unique<NoWakeWord>();
|
||||
#endif
|
||||
|
||||
esp_timer_create_args_t clock_timer_args = {
|
||||
@ -129,9 +145,7 @@ void Application::CheckNewVersion() {
|
||||
|
||||
auto& board = Board::GetInstance();
|
||||
board.SetPowerSaveMode(false);
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
wake_word_detect_.StopDetection();
|
||||
#endif
|
||||
wake_word_->StopDetection();
|
||||
// 预先关闭音频输出,避免升级过程有音频操作
|
||||
auto codec = board.GetAudioCodec();
|
||||
codec->EnableInput(false);
|
||||
@ -256,8 +270,6 @@ void Application::PlaySound(const std::string_view& sound) {
|
||||
}
|
||||
background_task_->WaitForCompletion();
|
||||
|
||||
// The assets are encoded at 16000Hz, 60ms frame duration
|
||||
SetDecodeSampleRate(16000, 60);
|
||||
const char* data = sound.data();
|
||||
size_t size = sound.size();
|
||||
for (const char* p = data; p < data + size; ) {
|
||||
@ -266,6 +278,8 @@ void Application::PlaySound(const std::string_view& sound) {
|
||||
|
||||
auto payload_size = ntohs(p3->payload_size);
|
||||
AudioStreamPacket packet;
|
||||
packet.sample_rate = 16000;
|
||||
packet.frame_duration = 60;
|
||||
packet.payload.resize(payload_size);
|
||||
memcpy(packet.payload.data(), p3->payload, payload_size);
|
||||
p += payload_size;
|
||||
@ -432,7 +446,7 @@ void Application::Start() {
|
||||
});
|
||||
protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||
if (device_state_ == kDeviceStateSpeaking && audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||
audio_decode_queue_.emplace_back(std::move(packet));
|
||||
}
|
||||
});
|
||||
@ -442,7 +456,6 @@ void Application::Start() {
|
||||
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
|
||||
protocol_->server_sample_rate(), codec->output_sample_rate());
|
||||
}
|
||||
SetDecodeSampleRate(protocol_->server_sample_rate(), protocol_->server_frame_duration());
|
||||
|
||||
#if CONFIG_IOT_PROTOCOL_XIAOZHI
|
||||
auto& thing_manager = iot::ThingManager::GetInstance();
|
||||
@ -600,28 +613,40 @@ void Application::Start() {
|
||||
}
|
||||
});
|
||||
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
wake_word_detect_.Initialize(codec);
|
||||
#ifdef CONFIG_USE_WAKE_WORD_DETECT
|
||||
wake_word_detect_.OnWakeWordDetected([this](const std::string& wake_word) {
|
||||
wake_word_->Initialize(codec);
|
||||
wake_word_->OnWakeWordDetected([this](const std::string& wake_word) {
|
||||
Schedule([this, &wake_word]() {
|
||||
if (device_state_ == kDeviceStateIdle) {
|
||||
SetDeviceState(kDeviceStateConnecting);
|
||||
wake_word_detect_.EncodeWakeWordData();
|
||||
if (!protocol_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!protocol_ || !protocol_->OpenAudioChannel()) {
|
||||
wake_word_detect_.StartDetection();
|
||||
return;
|
||||
if (device_state_ == kDeviceStateIdle) {
|
||||
wake_word_->EncodeWakeWordData();
|
||||
|
||||
if (!protocol_->IsAudioChannelOpened()) {
|
||||
SetDeviceState(kDeviceStateConnecting);
|
||||
if (!protocol_->OpenAudioChannel()) {
|
||||
wake_word_->StartDetection();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
|
||||
#if CONFIG_USE_AFE_WAKE_WORD
|
||||
AudioStreamPacket packet;
|
||||
// Encode and send the wake word data to the server
|
||||
while (wake_word_detect_.GetWakeWordOpus(packet.payload)) {
|
||||
while (wake_word_->GetWakeWordOpus(packet.payload)) {
|
||||
protocol_->SendAudio(packet);
|
||||
}
|
||||
// Set the chat state to wake word detected
|
||||
protocol_->SendWakeWordDetected(wake_word);
|
||||
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
|
||||
#else
|
||||
// Play the pop up sound to indicate the wake word is detected
|
||||
// And wait 60ms to make sure the queue has been processed by audio task
|
||||
ResetDecoder();
|
||||
PlaySound(Lang::Sounds::P3_POPUP);
|
||||
vTaskDelay(pdMS_TO_TICKS(60));
|
||||
#endif
|
||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
||||
} else if (device_state_ == kDeviceStateSpeaking) {
|
||||
AbortSpeaking(kAbortReasonWakeWordDetected);
|
||||
@ -630,9 +655,7 @@ void Application::Start() {
|
||||
}
|
||||
});
|
||||
});
|
||||
#endif
|
||||
wake_word_detect_.StartDetection();
|
||||
#endif
|
||||
wake_word_->StartDetection();
|
||||
|
||||
// Wait for the new version check to finish
|
||||
xEventGroupWaitBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT, pdTRUE, pdFALSE, portMAX_DELAY);
|
||||
@ -751,17 +774,14 @@ void Application::OnAudioOutput() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device_state_ == kDeviceStateListening) {
|
||||
audio_decode_queue_.clear();
|
||||
audio_decode_cv_.notify_all();
|
||||
return;
|
||||
}
|
||||
|
||||
auto packet = std::move(audio_decode_queue_.front());
|
||||
audio_decode_queue_.pop_front();
|
||||
lock.unlock();
|
||||
audio_decode_cv_.notify_all();
|
||||
|
||||
// Synchronize the sample rate and frame duration
|
||||
SetDecodeSampleRate(packet.sample_rate, packet.frame_duration);
|
||||
|
||||
busy_decoding_audio_ = true;
|
||||
background_task_->Schedule([this, codec, packet = std::move(packet)]() mutable {
|
||||
busy_decoding_audio_ = false;
|
||||
@ -782,45 +802,48 @@ void Application::OnAudioOutput() {
|
||||
}
|
||||
codec->OutputData(pcm);
|
||||
#ifdef CONFIG_USE_SERVER_AEC
|
||||
std::lock_guard<std::mutex> lock(timestamp_mutex_);
|
||||
timestamp_queue_.push_back(packet.timestamp);
|
||||
last_output_timestamp_ = packet.timestamp;
|
||||
std::lock_guard<std::mutex> lock(timestamp_mutex_);
|
||||
timestamp_queue_.push_back(packet.timestamp);
|
||||
#endif
|
||||
last_output_time_ = std::chrono::steady_clock::now();
|
||||
});
|
||||
}
|
||||
|
||||
void Application::OnAudioInput() {
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
if (wake_word_detect_.IsDetectionRunning()) {
|
||||
if (wake_word_->IsDetectionRunning()) {
|
||||
std::vector<int16_t> data;
|
||||
int samples = wake_word_detect_.GetFeedSize();
|
||||
int samples = wake_word_->GetFeedSize();
|
||||
if (samples > 0) {
|
||||
ReadAudio(data, 16000, samples);
|
||||
wake_word_detect_.Feed(data);
|
||||
return;
|
||||
if (ReadAudio(data, 16000, samples)) {
|
||||
wake_word_->Feed(data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (audio_processor_->IsRunning()) {
|
||||
std::vector<int16_t> data;
|
||||
int samples = audio_processor_->GetFeedSize();
|
||||
if (samples > 0) {
|
||||
ReadAudio(data, 16000, samples);
|
||||
audio_processor_->Feed(data);
|
||||
return;
|
||||
if (ReadAudio(data, 16000, samples)) {
|
||||
audio_processor_->Feed(data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vTaskDelay(pdMS_TO_TICKS(OPUS_FRAME_DURATION_MS / 2));
|
||||
}
|
||||
|
||||
void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples) {
|
||||
bool Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples) {
|
||||
auto codec = Board::GetInstance().GetAudioCodec();
|
||||
if (!codec->input_enabled()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (codec->input_sample_rate() != sample_rate) {
|
||||
data.resize(samples * codec->input_sample_rate() / sample_rate);
|
||||
if (!codec->InputData(data)) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
if (codec->input_channels() == 2) {
|
||||
auto mic_channel = std::vector<int16_t>(data.size() / 2);
|
||||
@ -846,9 +869,10 @@ void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int sam
|
||||
} else {
|
||||
data.resize(samples);
|
||||
if (!codec->InputData(data)) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Application::AbortSpeaking(AbortReason reason) {
|
||||
@ -884,17 +908,13 @@ void Application::SetDeviceState(DeviceState state) {
|
||||
display->SetStatus(Lang::Strings::STANDBY);
|
||||
display->SetEmotion("neutral");
|
||||
audio_processor_->Stop();
|
||||
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
wake_word_detect_.StartDetection();
|
||||
#endif
|
||||
wake_word_->StartDetection();
|
||||
break;
|
||||
case kDeviceStateConnecting:
|
||||
display->SetStatus(Lang::Strings::CONNECTING);
|
||||
display->SetEmotion("neutral");
|
||||
display->SetChatMessage("system", "");
|
||||
timestamp_queue_.clear();
|
||||
last_output_timestamp_ = 0;
|
||||
break;
|
||||
case kDeviceStateListening:
|
||||
display->SetStatus(Lang::Strings::LISTENING);
|
||||
@ -909,14 +929,14 @@ void Application::SetDeviceState(DeviceState state) {
|
||||
// Send the start listening command
|
||||
protocol_->SendStartListening(listening_mode_);
|
||||
if (previous_state == kDeviceStateSpeaking) {
|
||||
audio_decode_queue_.clear();
|
||||
audio_decode_cv_.notify_all();
|
||||
// FIXME: Wait for the speaker to empty the buffer
|
||||
vTaskDelay(pdMS_TO_TICKS(120));
|
||||
}
|
||||
opus_encoder_->ResetState();
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
wake_word_detect_.StopDetection();
|
||||
#endif
|
||||
audio_processor_->Start();
|
||||
wake_word_->StopDetection();
|
||||
}
|
||||
break;
|
||||
case kDeviceStateSpeaking:
|
||||
@ -924,8 +944,11 @@ void Application::SetDeviceState(DeviceState state) {
|
||||
|
||||
if (listening_mode_ != kListeningModeRealtime) {
|
||||
audio_processor_->Stop();
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
wake_word_detect_.StartDetection();
|
||||
// Only AFE wake word can be detected in speaking mode
|
||||
#if CONFIG_USE_AFE_WAKE_WORD
|
||||
wake_word_->StartDetection();
|
||||
#else
|
||||
wake_word_->StopDetection();
|
||||
#endif
|
||||
}
|
||||
ResetDecoder();
|
||||
|
||||
@ -21,12 +21,7 @@
|
||||
#include "ota.h"
|
||||
#include "background_task.h"
|
||||
#include "audio_processor.h"
|
||||
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT
|
||||
#include "wake_word_detect.h"
|
||||
#elif CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
#include "wake_word_no_afe.h"
|
||||
#endif
|
||||
#include "wake_word.h"
|
||||
|
||||
#define SCHEDULE_EVENT (1 << 0)
|
||||
#define SEND_AUDIO_EVENT (1 << 1)
|
||||
@ -83,14 +78,13 @@ public:
|
||||
void SendMcpMessage(const std::string& payload);
|
||||
void SetAecMode(AecMode mode);
|
||||
AecMode GetAecMode() const { return aec_mode_; }
|
||||
BackgroundTask* GetBackgroundTask() const { return background_task_; }
|
||||
|
||||
private:
|
||||
Application();
|
||||
~Application();
|
||||
|
||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
||||
WakeWordDetect wake_word_detect_;
|
||||
#endif
|
||||
std::unique_ptr<WakeWord> wake_word_;
|
||||
std::unique_ptr<AudioProcessor> audio_processor_;
|
||||
Ota ota_;
|
||||
std::mutex mutex_;
|
||||
@ -119,7 +113,6 @@ private:
|
||||
// 新增:用于维护音频包的timestamp队列
|
||||
std::list<uint32_t> timestamp_queue_;
|
||||
std::mutex timestamp_mutex_;
|
||||
std::atomic<uint32_t> last_output_timestamp_ = 0;
|
||||
|
||||
std::unique_ptr<OpusEncoderWrapper> opus_encoder_;
|
||||
std::unique_ptr<OpusDecoderWrapper> opus_decoder_;
|
||||
@ -131,7 +124,7 @@ private:
|
||||
void MainEventLoop();
|
||||
void OnAudioInput();
|
||||
void OnAudioOutput();
|
||||
void ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples);
|
||||
bool ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples);
|
||||
void ResetDecoder();
|
||||
void SetDecodeSampleRate(int sample_rate, int frame_duration);
|
||||
void CheckNewVersion();
|
||||
|
||||
BIN
main/assets/common/popup.p3
Normal file
BIN
main/assets/common/popup.p3
Normal file
Binary file not shown.
@ -3,7 +3,7 @@
|
||||
|
||||
#define PROCESSOR_RUNNING 0x01
|
||||
|
||||
static const char* TAG = "AfeAudioProcessor";
|
||||
#define TAG "AfeAudioProcessor"
|
||||
|
||||
AfeAudioProcessor::AfeAudioProcessor()
|
||||
: afe_data_(nullptr) {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#include "wake_word_detect.h"
|
||||
#include "afe_wake_word.h"
|
||||
#include "application.h"
|
||||
|
||||
#include <esp_log.h>
|
||||
@ -8,9 +8,9 @@
|
||||
|
||||
#define DETECTION_RUNNING_EVENT 1
|
||||
|
||||
static const char* TAG = "WakeWordDetect";
|
||||
#define TAG "AfeWakeWord"
|
||||
|
||||
WakeWordDetect::WakeWordDetect()
|
||||
AfeWakeWord::AfeWakeWord()
|
||||
: afe_data_(nullptr),
|
||||
wake_word_pcm_(),
|
||||
wake_word_opus_() {
|
||||
@ -18,7 +18,7 @@ WakeWordDetect::WakeWordDetect()
|
||||
event_group_ = xEventGroupCreate();
|
||||
}
|
||||
|
||||
WakeWordDetect::~WakeWordDetect() {
|
||||
AfeWakeWord::~AfeWakeWord() {
|
||||
if (afe_data_ != nullptr) {
|
||||
afe_iface_->destroy(afe_data_);
|
||||
}
|
||||
@ -30,7 +30,7 @@ WakeWordDetect::~WakeWordDetect() {
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
void WakeWordDetect::Initialize(AudioCodec* codec) {
|
||||
void AfeWakeWord::Initialize(AudioCodec* codec) {
|
||||
codec_ = codec;
|
||||
int ref_num = codec_->input_reference() ? 1 : 0;
|
||||
|
||||
@ -67,46 +67,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) {
|
||||
afe_data_ = afe_iface_->create_from_config(afe_config);
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
auto this_ = (WakeWordDetect*)arg;
|
||||
auto this_ = (AfeWakeWord*)arg;
|
||||
this_->AudioDetectionTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_detection", 4096, this, 3, nullptr);
|
||||
}
|
||||
|
||||
void WakeWordDetect::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
||||
void AfeWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
||||
wake_word_detected_callback_ = callback;
|
||||
}
|
||||
|
||||
void WakeWordDetect::StartDetection() {
|
||||
void AfeWakeWord::StartDetection() {
|
||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||
}
|
||||
|
||||
void WakeWordDetect::StopDetection() {
|
||||
void AfeWakeWord::StopDetection() {
|
||||
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||
if (afe_data_ != nullptr) {
|
||||
afe_iface_->reset_buffer(afe_data_);
|
||||
}
|
||||
}
|
||||
|
||||
bool WakeWordDetect::IsDetectionRunning() {
|
||||
bool AfeWakeWord::IsDetectionRunning() {
|
||||
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
||||
}
|
||||
|
||||
void WakeWordDetect::Feed(const std::vector<int16_t>& data) {
|
||||
void AfeWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||
if (afe_data_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
afe_iface_->feed(afe_data_, data.data());
|
||||
}
|
||||
|
||||
size_t WakeWordDetect::GetFeedSize() {
|
||||
size_t AfeWakeWord::GetFeedSize() {
|
||||
if (afe_data_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
|
||||
}
|
||||
|
||||
void WakeWordDetect::AudioDetectionTask() {
|
||||
void AfeWakeWord::AudioDetectionTask() {
|
||||
auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_);
|
||||
auto feed_size = afe_iface_->get_feed_chunksize(afe_data_);
|
||||
ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d",
|
||||
@ -121,7 +121,7 @@ void WakeWordDetect::AudioDetectionTask() {
|
||||
}
|
||||
|
||||
// Store the wake word data for voice recognition, like who is speaking
|
||||
StoreWakeWordData((uint16_t*)res->data, res->data_size / sizeof(uint16_t));
|
||||
StoreWakeWordData(res->data, res->data_size / sizeof(int16_t));
|
||||
|
||||
if (res->wakeup_state == WAKENET_DETECTED) {
|
||||
StopDetection();
|
||||
@ -134,7 +134,7 @@ void WakeWordDetect::AudioDetectionTask() {
|
||||
}
|
||||
}
|
||||
|
||||
void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
|
||||
void AfeWakeWord::StoreWakeWordData(const int16_t* data, size_t samples) {
|
||||
// store audio data to wake_word_pcm_
|
||||
wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
|
||||
// keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512)
|
||||
@ -143,13 +143,13 @@ void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
|
||||
}
|
||||
}
|
||||
|
||||
void WakeWordDetect::EncodeWakeWordData() {
|
||||
void AfeWakeWord::EncodeWakeWordData() {
|
||||
wake_word_opus_.clear();
|
||||
if (wake_word_encode_task_stack_ == nullptr) {
|
||||
wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(4096 * 8, MALLOC_CAP_SPIRAM);
|
||||
}
|
||||
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
|
||||
auto this_ = (WakeWordDetect*)arg;
|
||||
auto this_ = (AfeWakeWord*)arg;
|
||||
{
|
||||
auto start_time = esp_timer_get_time();
|
||||
auto encoder = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
|
||||
@ -176,7 +176,7 @@ void WakeWordDetect::EncodeWakeWordData() {
|
||||
}, "encode_detect_packets", 4096 * 8, this, 2, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
|
||||
}
|
||||
|
||||
bool WakeWordDetect::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
||||
bool AfeWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
||||
std::unique_lock<std::mutex> lock(wake_word_mutex_);
|
||||
wake_word_cv_.wait(lock, [this]() {
|
||||
return !wake_word_opus_.empty();
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef WAKE_WORD_DETECT_H
|
||||
#define WAKE_WORD_DETECT_H
|
||||
#ifndef AFE_WAKE_WORD_H
|
||||
#define AFE_WAKE_WORD_H
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/task.h>
|
||||
@ -16,11 +16,12 @@
|
||||
#include <condition_variable>
|
||||
|
||||
#include "audio_codec.h"
|
||||
#include "wake_word.h"
|
||||
|
||||
class WakeWordDetect {
|
||||
class AfeWakeWord : public WakeWord {
|
||||
public:
|
||||
WakeWordDetect();
|
||||
~WakeWordDetect();
|
||||
AfeWakeWord();
|
||||
~AfeWakeWord();
|
||||
|
||||
void Initialize(AudioCodec* codec);
|
||||
void Feed(const std::vector<int16_t>& data);
|
||||
@ -51,7 +52,7 @@ private:
|
||||
std::mutex wake_word_mutex_;
|
||||
std::condition_variable wake_word_cv_;
|
||||
|
||||
void StoreWakeWordData(uint16_t* data, size_t size);
|
||||
void StoreWakeWordData(const int16_t* data, size_t size);
|
||||
void AudioDetectionTask();
|
||||
};
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#include "wake_word_no_afe.h"
|
||||
#include "esp_wake_word.h"
|
||||
#include "application.h"
|
||||
|
||||
#include <esp_log.h>
|
||||
@ -8,13 +8,13 @@
|
||||
|
||||
#define DETECTION_RUNNING_EVENT 1
|
||||
|
||||
static const char* TAG = "WakeWordDetect";
|
||||
#define TAG "EspWakeWord"
|
||||
|
||||
WakeWordDetect::WakeWordDetect() {
|
||||
EspWakeWord::EspWakeWord() {
|
||||
event_group_ = xEventGroupCreate();
|
||||
}
|
||||
|
||||
WakeWordDetect::~WakeWordDetect() {
|
||||
EspWakeWord::~EspWakeWord() {
|
||||
if (wakenet_data_ != nullptr) {
|
||||
wakenet_iface_->destroy(wakenet_data_);
|
||||
esp_srmodel_deinit(wakenet_model_);
|
||||
@ -23,13 +23,16 @@ WakeWordDetect::~WakeWordDetect() {
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
void WakeWordDetect::Initialize(AudioCodec* codec) {
|
||||
void EspWakeWord::Initialize(AudioCodec* codec) {
|
||||
codec_ = codec;
|
||||
|
||||
wakenet_model_ = esp_srmodel_init("model");
|
||||
|
||||
if(wakenet_model_->num > 1) {
|
||||
ESP_LOGW(TAG, "More than one model found, using the first one");
|
||||
} else if (wakenet_model_->num == 0) {
|
||||
ESP_LOGE(TAG, "No model found");
|
||||
return;
|
||||
}
|
||||
char *model_name = wakenet_model_->model_name[0];
|
||||
wakenet_iface_ = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
|
||||
@ -40,28 +43,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) {
|
||||
ESP_LOGI(TAG, "Wake word(%s),freq: %d, chunksize: %d", model_name, frequency, audio_chunksize);
|
||||
}
|
||||
|
||||
void WakeWordDetect::StartDetection() {
|
||||
void EspWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
||||
wake_word_detected_callback_ = callback;
|
||||
}
|
||||
|
||||
void EspWakeWord::StartDetection() {
|
||||
ESP_LOGI(TAG, "Start wake word detection");
|
||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||
}
|
||||
|
||||
void WakeWordDetect::StopDetection() {
|
||||
void EspWakeWord::StopDetection() {
|
||||
ESP_LOGI(TAG, "Stop wake word detection");
|
||||
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||
}
|
||||
|
||||
bool WakeWordDetect::IsDetectionRunning() {
|
||||
bool EspWakeWord::IsDetectionRunning() {
|
||||
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
||||
}
|
||||
|
||||
void WakeWordDetect::Feed(const std::vector<int16_t>& data) {
|
||||
void EspWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||
int res = wakenet_iface_->detect(wakenet_data_, (int16_t *)data.data());
|
||||
if (res > 0) {
|
||||
ESP_LOGI(TAG, "Wake word detected");
|
||||
auto& app = Application::GetInstance();
|
||||
app.ToggleChatState();
|
||||
StopDetection();
|
||||
last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res);
|
||||
|
||||
if (wake_word_detected_callback_) {
|
||||
wake_word_detected_callback_(last_detected_wake_word_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t WakeWordDetect::GetFeedSize() {
|
||||
|
||||
size_t EspWakeWord::GetFeedSize() {
|
||||
if (wakenet_data_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
return wakenet_iface_->get_samp_chunksize(wakenet_data_) * codec_->input_channels();
|
||||
}
|
||||
|
||||
void EspWakeWord::EncodeWakeWordData() {
|
||||
}
|
||||
|
||||
bool EspWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
||||
return false;
|
||||
}
|
||||
@ -1,13 +1,13 @@
|
||||
#ifndef WAKE_WORD_DETECT_H
|
||||
#define WAKE_WORD_DETECT_H
|
||||
#ifndef ESP_WAKE_WORD_H
|
||||
#define ESP_WAKE_WORD_H
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/task.h>
|
||||
#include <freertos/event_groups.h>
|
||||
|
||||
#include "model_path.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include <esp_wn_iface.h>
|
||||
#include <esp_wn_models.h>
|
||||
#include <model_path.h>
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
@ -17,19 +17,23 @@
|
||||
#include <condition_variable>
|
||||
|
||||
#include "audio_codec.h"
|
||||
#include <model_path.h>
|
||||
#include "wake_word.h"
|
||||
|
||||
class WakeWordDetect {
|
||||
class EspWakeWord : public WakeWord {
|
||||
public:
|
||||
WakeWordDetect();
|
||||
~WakeWordDetect();
|
||||
EspWakeWord();
|
||||
~EspWakeWord();
|
||||
|
||||
void Initialize(AudioCodec* codec);
|
||||
void Feed(const std::vector<int16_t>& data);
|
||||
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
|
||||
void StartDetection();
|
||||
void StopDetection();
|
||||
bool IsDetectionRunning();
|
||||
size_t GetFeedSize();
|
||||
void EncodeWakeWordData();
|
||||
bool GetWakeWordOpus(std::vector<uint8_t>& opus);
|
||||
const std::string& GetLastDetectedWakeWord() const { return last_detected_wake_word_; }
|
||||
|
||||
private:
|
||||
esp_wn_iface_t *wakenet_iface_ = nullptr;
|
||||
@ -37,6 +41,9 @@ private:
|
||||
srmodel_list_t *wakenet_model_ = nullptr;
|
||||
EventGroupHandle_t event_group_;
|
||||
AudioCodec* codec_ = nullptr;
|
||||
|
||||
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
|
||||
std::string last_detected_wake_word_;
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -1,13 +1,13 @@
|
||||
#include "dummy_audio_processor.h"
|
||||
#include "no_audio_processor.h"
|
||||
#include <esp_log.h>
|
||||
|
||||
#define TAG "DummyAudioProcessor"
|
||||
#define TAG "NoAudioProcessor"
|
||||
|
||||
void DummyAudioProcessor::Initialize(AudioCodec* codec) {
|
||||
void NoAudioProcessor::Initialize(AudioCodec* codec) {
|
||||
codec_ = codec;
|
||||
}
|
||||
|
||||
void DummyAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
||||
void NoAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
||||
if (!is_running_ || !output_callback_) {
|
||||
return;
|
||||
}
|
||||
@ -15,27 +15,27 @@ void DummyAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
||||
output_callback_(std::vector<int16_t>(data));
|
||||
}
|
||||
|
||||
void DummyAudioProcessor::Start() {
|
||||
void NoAudioProcessor::Start() {
|
||||
is_running_ = true;
|
||||
}
|
||||
|
||||
void DummyAudioProcessor::Stop() {
|
||||
void NoAudioProcessor::Stop() {
|
||||
is_running_ = false;
|
||||
}
|
||||
|
||||
bool DummyAudioProcessor::IsRunning() {
|
||||
bool NoAudioProcessor::IsRunning() {
|
||||
return is_running_;
|
||||
}
|
||||
|
||||
void DummyAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
|
||||
void NoAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
|
||||
output_callback_ = callback;
|
||||
}
|
||||
|
||||
void DummyAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
|
||||
void NoAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
|
||||
vad_state_change_callback_ = callback;
|
||||
}
|
||||
|
||||
size_t DummyAudioProcessor::GetFeedSize() {
|
||||
size_t NoAudioProcessor::GetFeedSize() {
|
||||
if (!codec_) {
|
||||
return 0;
|
||||
}
|
||||
@ -43,7 +43,7 @@ size_t DummyAudioProcessor::GetFeedSize() {
|
||||
return 30 * codec_->input_sample_rate() / 1000;
|
||||
}
|
||||
|
||||
void DummyAudioProcessor::EnableDeviceAec(bool enable) {
|
||||
void NoAudioProcessor::EnableDeviceAec(bool enable) {
|
||||
if (enable) {
|
||||
ESP_LOGE(TAG, "Device AEC is not supported");
|
||||
}
|
||||
@ -7,10 +7,10 @@
|
||||
#include "audio_processor.h"
|
||||
#include "audio_codec.h"
|
||||
|
||||
class DummyAudioProcessor : public AudioProcessor {
|
||||
class NoAudioProcessor : public AudioProcessor {
|
||||
public:
|
||||
DummyAudioProcessor() = default;
|
||||
~DummyAudioProcessor() = default;
|
||||
NoAudioProcessor() = default;
|
||||
~NoAudioProcessor() = default;
|
||||
|
||||
void Initialize(AudioCodec* codec) override;
|
||||
void Feed(const std::vector<int16_t>& data) override;
|
||||
45
main/audio_processing/no_wake_word.cc
Normal file
45
main/audio_processing/no_wake_word.cc
Normal file
@ -0,0 +1,45 @@
|
||||
#include "no_wake_word.h"
|
||||
#include <esp_log.h>
|
||||
|
||||
#define TAG "NoWakeWord"
|
||||
|
||||
void NoWakeWord::Initialize(AudioCodec* codec) {
|
||||
codec_ = codec;
|
||||
}
|
||||
|
||||
void NoWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||
// Do nothing - no wake word processing
|
||||
}
|
||||
|
||||
void NoWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
||||
// Do nothing - no wake word processing
|
||||
}
|
||||
|
||||
void NoWakeWord::StartDetection() {
|
||||
// Do nothing - no wake word processing
|
||||
}
|
||||
|
||||
void NoWakeWord::StopDetection() {
|
||||
// Do nothing - no wake word processing
|
||||
}
|
||||
|
||||
bool NoWakeWord::IsDetectionRunning() {
|
||||
return false; // No wake word processing
|
||||
}
|
||||
|
||||
size_t NoWakeWord::GetFeedSize() {
|
||||
return 0; // No specific feed size requirement
|
||||
}
|
||||
|
||||
void NoWakeWord::EncodeWakeWordData() {
|
||||
// Do nothing - no encoding needed
|
||||
}
|
||||
|
||||
bool NoWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
||||
opus.clear();
|
||||
return false; // No opus data available
|
||||
}
|
||||
|
||||
const std::string& NoWakeWord::GetLastDetectedWakeWord() const {
|
||||
return ""; // No wake word detected
|
||||
}
|
||||
31
main/audio_processing/no_wake_word.h
Normal file
31
main/audio_processing/no_wake_word.h
Normal file
@ -0,0 +1,31 @@
|
||||
#ifndef NO_WAKE_WORD_H
|
||||
#define NO_WAKE_WORD_H
|
||||
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "wake_word.h"
|
||||
#include "audio_codec.h"
|
||||
|
||||
class NoWakeWord : public WakeWord {
|
||||
public:
|
||||
NoWakeWord() = default;
|
||||
~NoWakeWord() = default;
|
||||
|
||||
void Initialize(AudioCodec* codec) override;
|
||||
void Feed(const std::vector<int16_t>& data) override;
|
||||
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) override;
|
||||
void StartDetection() override;
|
||||
void StopDetection() override;
|
||||
bool IsDetectionRunning() override;
|
||||
size_t GetFeedSize() override;
|
||||
void EncodeWakeWordData() override;
|
||||
bool GetWakeWordOpus(std::vector<uint8_t>& opus) override;
|
||||
const std::string& GetLastDetectedWakeWord() const override;
|
||||
|
||||
private:
|
||||
AudioCodec* codec_ = nullptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
26
main/audio_processing/wake_word.h
Normal file
26
main/audio_processing/wake_word.h
Normal file
@ -0,0 +1,26 @@
|
||||
#ifndef WAKE_WORD_H
|
||||
#define WAKE_WORD_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include "audio_codec.h"
|
||||
|
||||
class WakeWord {
|
||||
public:
|
||||
virtual ~WakeWord() = default;
|
||||
|
||||
virtual void Initialize(AudioCodec* codec) = 0;
|
||||
virtual void Feed(const std::vector<int16_t>& data) = 0;
|
||||
virtual void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) = 0;
|
||||
virtual void StartDetection() = 0;
|
||||
virtual void StopDetection() = 0;
|
||||
virtual bool IsDetectionRunning() = 0;
|
||||
virtual size_t GetFeedSize() = 0;
|
||||
virtual void EncodeWakeWordData() = 0;
|
||||
virtual bool GetWakeWordOpus(std::vector<uint8_t>& opus) = 0;
|
||||
virtual const std::string& GetLastDetectedWakeWord() const = 0;
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -30,7 +30,8 @@
|
||||
"CONFIG_MBEDTLS_DYNAMIC_FREE_CONFIG_DATA=y",
|
||||
"CONFIG_NEWLIB_NANO_FORMAT=y",
|
||||
"CONFIG_MMAP_FILE_NAME_LENGTH=25",
|
||||
"CONFIG_ESP_CONSOLE_NONE=y"
|
||||
"CONFIG_ESP_CONSOLE_NONE=y",
|
||||
"CONFIG_IOT_PROTOCOL_XIAOZHI=y"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@ -70,7 +70,7 @@ private:
|
||||
}
|
||||
|
||||
void InitializePowerSaveTimer() {
|
||||
power_save_timer_ = new PowerSaveTimer(160, 60);
|
||||
power_save_timer_ = new PowerSaveTimer(240, 60);
|
||||
power_save_timer_->OnEnterSleepMode([this]() {
|
||||
ESP_LOGI(TAG, "Enabling sleep mode");
|
||||
auto display = GetDisplay();
|
||||
|
||||
@ -5,7 +5,9 @@
|
||||
"name": "lichuang-c3-dev",
|
||||
"sdkconfig_append": [
|
||||
"CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y",
|
||||
"CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\""
|
||||
"CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\"",
|
||||
"CONFIG_ESP_WIFI_ENTERPRISE_SUPPORT=n",
|
||||
"CONFIG_LWIP_IPV6=n"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@ -5,7 +5,8 @@
|
||||
"name": "magiclick-c3-v2",
|
||||
"sdkconfig_append": [
|
||||
"CONFIG_PM_ENABLE=y",
|
||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
|
||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
|
||||
"CONFIG_USE_ESP_WAKE_WORD=n"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@ -5,7 +5,8 @@
|
||||
"name": "magiclick-c3",
|
||||
"sdkconfig_append": [
|
||||
"CONFIG_PM_ENABLE=y",
|
||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
|
||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
|
||||
"CONFIG_USE_ESP_WAKE_WORD=n"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@ -5,7 +5,8 @@
|
||||
"name": "xmini-c3",
|
||||
"sdkconfig_append": [
|
||||
"CONFIG_PM_ENABLE=y",
|
||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
|
||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
|
||||
"CONFIG_USE_ESP_WAKE_WORD=y"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@ -30,10 +30,10 @@ private:
|
||||
Display* display_ = nullptr;
|
||||
Button boot_button_;
|
||||
bool press_to_talk_enabled_ = false;
|
||||
PowerSaveTimer* power_save_timer_;
|
||||
PowerSaveTimer* power_save_timer_ = nullptr;
|
||||
|
||||
void InitializePowerSaveTimer() {
|
||||
power_save_timer_ = new PowerSaveTimer(160, 60);
|
||||
power_save_timer_ = new PowerSaveTimer(160, 600);
|
||||
power_save_timer_->OnEnterSleepMode([this]() {
|
||||
ESP_LOGI(TAG, "Enabling sleep mode");
|
||||
auto display = GetDisplay();
|
||||
@ -130,7 +130,9 @@ private:
|
||||
}
|
||||
});
|
||||
boot_button_.OnPressDown([this]() {
|
||||
power_save_timer_->WakeUp();
|
||||
if (power_save_timer_) {
|
||||
power_save_timer_->WakeUp();
|
||||
}
|
||||
if (press_to_talk_enabled_) {
|
||||
Application::GetInstance().StartListening();
|
||||
}
|
||||
|
||||
@ -227,6 +227,8 @@ bool MqttProtocol::OpenAudioChannel() {
|
||||
auto nonce = (uint8_t*)data.data();
|
||||
auto encrypted = (uint8_t*)data.data() + aes_nonce_.size();
|
||||
AudioStreamPacket packet;
|
||||
packet.sample_rate = server_sample_rate_;
|
||||
packet.frame_duration = server_frame_duration_;
|
||||
packet.timestamp = timestamp;
|
||||
packet.payload.resize(decrypted_size);
|
||||
int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)packet.payload.data());
|
||||
|
||||
@ -8,6 +8,8 @@
|
||||
#include <vector>
|
||||
|
||||
struct AudioStreamPacket {
|
||||
int sample_rate = 0;
|
||||
int frame_duration = 0;
|
||||
uint32_t timestamp = 0;
|
||||
std::vector<uint8_t> payload;
|
||||
};
|
||||
|
||||
@ -124,6 +124,8 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
||||
bp2->payload_size = ntohl(bp2->payload_size);
|
||||
auto payload = (uint8_t*)bp2->payload;
|
||||
on_incoming_audio_(AudioStreamPacket{
|
||||
.sample_rate = server_sample_rate_,
|
||||
.frame_duration = server_frame_duration_,
|
||||
.timestamp = bp2->timestamp,
|
||||
.payload = std::vector<uint8_t>(payload, payload + bp2->payload_size)
|
||||
});
|
||||
@ -133,11 +135,15 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
||||
bp3->payload_size = ntohs(bp3->payload_size);
|
||||
auto payload = (uint8_t*)bp3->payload;
|
||||
on_incoming_audio_(AudioStreamPacket{
|
||||
.sample_rate = server_sample_rate_,
|
||||
.frame_duration = server_frame_duration_,
|
||||
.timestamp = 0,
|
||||
.payload = std::vector<uint8_t>(payload, payload + bp3->payload_size)
|
||||
});
|
||||
} else {
|
||||
on_incoming_audio_(AudioStreamPacket{
|
||||
.sample_rate = server_sample_rate_,
|
||||
.frame_duration = server_frame_duration_,
|
||||
.timestamp = 0,
|
||||
.payload = std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len)
|
||||
});
|
||||
|
||||
@ -1,2 +1,3 @@
|
||||
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||
CONFIG_SR_WN_WN9S_NIHAOXIAOZHI=y
|
||||
|
||||
Loading…
Reference in New Issue
Block a user