Add wake word to xmini-c3 (#730)
* esp-hi: MCP protocol is not ready yet * Add wake word to xmini-c3
This commit is contained in:
parent
6cb025859f
commit
ae57131c15
1
.gitignore
vendored
1
.gitignore
vendored
@ -10,5 +10,6 @@ dependencies.lock
|
|||||||
.env
|
.env
|
||||||
releases/
|
releases/
|
||||||
main/assets/lang_config.h
|
main/assets/lang_config.h
|
||||||
|
main/mmap_generate_emoji.h
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.cache
|
.cache
|
||||||
@ -194,13 +194,14 @@ list(APPEND SOURCES ${BOARD_SOURCES})
|
|||||||
if(CONFIG_USE_AUDIO_PROCESSOR)
|
if(CONFIG_USE_AUDIO_PROCESSOR)
|
||||||
list(APPEND SOURCES "audio_processing/afe_audio_processor.cc")
|
list(APPEND SOURCES "audio_processing/afe_audio_processor.cc")
|
||||||
else()
|
else()
|
||||||
list(APPEND SOURCES "audio_processing/dummy_audio_processor.cc")
|
list(APPEND SOURCES "audio_processing/no_audio_processor.cc")
|
||||||
endif()
|
endif()
|
||||||
if(CONFIG_USE_WAKE_WORD_DETECT)
|
if(CONFIG_USE_AFE_WAKE_WORD)
|
||||||
list(APPEND SOURCES "audio_processing/wake_word_detect.cc")
|
list(APPEND SOURCES "audio_processing/afe_wake_word.cc")
|
||||||
endif()
|
elseif(CONFIG_USE_ESP_WAKE_WORD)
|
||||||
if(CONFIG_USE_WAKE_WORD_DETECT_NO_AFE)
|
list(APPEND SOURCES "audio_processing/esp_wake_word.cc")
|
||||||
list(APPEND SOURCES "audio_processing/wake_word_no_afe.cc")
|
else()
|
||||||
|
list(APPEND SOURCES "audio_processing/no_wake_word.cc")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# 根据Kconfig选择语言目录
|
# 根据Kconfig选择语言目录
|
||||||
|
|||||||
@ -30,152 +30,226 @@ choice BOARD_TYPE
|
|||||||
Board type. 开发板类型
|
Board type. 开发板类型
|
||||||
config BOARD_TYPE_BREAD_COMPACT_WIFI
|
config BOARD_TYPE_BREAD_COMPACT_WIFI
|
||||||
bool "面包板新版接线(WiFi)"
|
bool "面包板新版接线(WiFi)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_BREAD_COMPACT_WIFI_LCD
|
config BOARD_TYPE_BREAD_COMPACT_WIFI_LCD
|
||||||
bool "面包板新版接线(WiFi)+ LCD"
|
bool "面包板新版接线(WiFi)+ LCD"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_BREAD_COMPACT_ML307
|
config BOARD_TYPE_BREAD_COMPACT_ML307
|
||||||
bool "面包板新版接线(ML307 AT)"
|
bool "面包板新版接线(ML307 AT)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_BREAD_COMPACT_ESP32
|
config BOARD_TYPE_BREAD_COMPACT_ESP32
|
||||||
bool "面包板(WiFi) ESP32 DevKit"
|
bool "面包板(WiFi) ESP32 DevKit"
|
||||||
|
depends on IDF_TARGET_ESP32
|
||||||
config BOARD_TYPE_BREAD_COMPACT_ESP32_LCD
|
config BOARD_TYPE_BREAD_COMPACT_ESP32_LCD
|
||||||
bool "面包板(WiFi+ LCD) ESP32 DevKit"
|
bool "面包板(WiFi+ LCD) ESP32 DevKit"
|
||||||
|
depends on IDF_TARGET_ESP32
|
||||||
config BOARD_TYPE_XMINI_C3
|
config BOARD_TYPE_XMINI_C3
|
||||||
bool "虾哥 Mini C3"
|
bool "虾哥 Mini C3"
|
||||||
|
depends on IDF_TARGET_ESP32C3
|
||||||
config BOARD_TYPE_ESP32S3_KORVO2_V3
|
config BOARD_TYPE_ESP32S3_KORVO2_V3
|
||||||
bool "ESP32S3_KORVO2_V3开发板"
|
bool "ESP32S3_KORVO2_V3开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP_SPARKBOT
|
config BOARD_TYPE_ESP_SPARKBOT
|
||||||
bool "ESP-SparkBot开发板"
|
bool "ESP-SparkBot开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP_SPOT_S3
|
config BOARD_TYPE_ESP_SPOT_S3
|
||||||
bool "ESP-Spot-S3"
|
bool "ESP-Spot-S3"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP_HI
|
config BOARD_TYPE_ESP_HI
|
||||||
bool "ESP-HI"
|
bool "ESP-HI"
|
||||||
|
depends on IDF_TARGET_ESP32C3
|
||||||
config BOARD_TYPE_ESP_BOX_3
|
config BOARD_TYPE_ESP_BOX_3
|
||||||
bool "ESP BOX 3"
|
bool "ESP BOX 3"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP_BOX
|
config BOARD_TYPE_ESP_BOX
|
||||||
bool "ESP BOX"
|
bool "ESP BOX"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP_BOX_LITE
|
config BOARD_TYPE_ESP_BOX_LITE
|
||||||
bool "ESP BOX Lite"
|
bool "ESP BOX Lite"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_KEVIN_BOX_1
|
config BOARD_TYPE_KEVIN_BOX_1
|
||||||
bool "Kevin Box 1"
|
bool "Kevin Box 1"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_KEVIN_BOX_2
|
config BOARD_TYPE_KEVIN_BOX_2
|
||||||
bool "Kevin Box 2"
|
bool "Kevin Box 2"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_KEVIN_C3
|
config BOARD_TYPE_KEVIN_C3
|
||||||
bool "Kevin C3"
|
bool "Kevin C3"
|
||||||
|
depends on IDF_TARGET_ESP32C3
|
||||||
config BOARD_TYPE_KEVIN_SP_V3_DEV
|
config BOARD_TYPE_KEVIN_SP_V3_DEV
|
||||||
bool "Kevin SP V3开发板"
|
bool "Kevin SP V3开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_KEVIN_SP_V4_DEV
|
config BOARD_TYPE_KEVIN_SP_V4_DEV
|
||||||
bool "Kevin SP V4开发板"
|
bool "Kevin SP V4开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32_CGC
|
config BOARD_TYPE_ESP32_CGC
|
||||||
bool "ESP32 CGC"
|
bool "ESP32 CGC"
|
||||||
|
depends on IDF_TARGET_ESP32
|
||||||
config BOARD_TYPE_KEVIN_YUYING_313LCD
|
config BOARD_TYPE_KEVIN_YUYING_313LCD
|
||||||
bool "鱼鹰科技3.13LCD开发板"
|
bool "鱼鹰科技3.13LCD开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_LICHUANG_DEV
|
config BOARD_TYPE_LICHUANG_DEV
|
||||||
bool "立创·实战派ESP32-S3开发板"
|
bool "立创·实战派ESP32-S3开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_LICHUANG_C3_DEV
|
config BOARD_TYPE_LICHUANG_C3_DEV
|
||||||
bool "立创·实战派ESP32-C3开发板"
|
bool "立创·实战派ESP32-C3开发板"
|
||||||
|
depends on IDF_TARGET_ESP32C3
|
||||||
config BOARD_TYPE_DF_K10
|
config BOARD_TYPE_DF_K10
|
||||||
bool "DFRobot 行空板 k10"
|
bool "DFRobot 行空板 k10"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_DF_S3_AI_CAM
|
config BOARD_TYPE_DF_S3_AI_CAM
|
||||||
bool "DFRobot ESP32-S3 AI智能摄像头模块"
|
bool "DFRobot ESP32-S3 AI智能摄像头模块"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_MAGICLICK_2P4
|
config BOARD_TYPE_MAGICLICK_2P4
|
||||||
bool "神奇按钮 Magiclick_2.4"
|
bool "神奇按钮 Magiclick_2.4"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_MAGICLICK_2P5
|
config BOARD_TYPE_MAGICLICK_2P5
|
||||||
bool "神奇按钮 Magiclick_2.5"
|
bool "神奇按钮 Magiclick_2.5"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_MAGICLICK_C3
|
config BOARD_TYPE_MAGICLICK_C3
|
||||||
bool "神奇按钮 Magiclick_C3"
|
bool "神奇按钮 Magiclick_C3"
|
||||||
|
depends on IDF_TARGET_ESP32C3
|
||||||
config BOARD_TYPE_MAGICLICK_C3_V2
|
config BOARD_TYPE_MAGICLICK_C3_V2
|
||||||
bool "神奇按钮 Magiclick_C3_v2"
|
bool "神奇按钮 Magiclick_C3_v2"
|
||||||
|
depends on IDF_TARGET_ESP32C3
|
||||||
config BOARD_TYPE_M5STACK_CORE_S3
|
config BOARD_TYPE_M5STACK_CORE_S3
|
||||||
bool "M5Stack CoreS3"
|
bool "M5Stack CoreS3"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_M5STACK_CORE_TAB5
|
config BOARD_TYPE_M5STACK_CORE_TAB5
|
||||||
bool "M5Stack Tab5"
|
bool "M5Stack Tab5"
|
||||||
|
depends on IDF_TARGET_ESP32P4
|
||||||
config BOARD_TYPE_ATOMS3_ECHO_BASE
|
config BOARD_TYPE_ATOMS3_ECHO_BASE
|
||||||
bool "AtomS3 + Echo Base"
|
bool "AtomS3 + Echo Base"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATOMS3R_ECHO_BASE
|
config BOARD_TYPE_ATOMS3R_ECHO_BASE
|
||||||
bool "AtomS3R + Echo Base"
|
bool "AtomS3R + Echo Base"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATOMS3R_CAM_M12_ECHO_BASE
|
config BOARD_TYPE_ATOMS3R_CAM_M12_ECHO_BASE
|
||||||
bool "AtomS3R CAM/M12 + Echo Base"
|
bool "AtomS3R CAM/M12 + Echo Base"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATOMMATRIX_ECHO_BASE
|
config BOARD_TYPE_ATOMMATRIX_ECHO_BASE
|
||||||
bool "AtomMatrix + Echo Base"
|
bool "AtomMatrix + Echo Base"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_8
|
config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_8
|
||||||
bool "Waveshare ESP32-S3-Touch-AMOLED-1.8"
|
bool "Waveshare ESP32-S3-Touch-AMOLED-1.8"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_75
|
config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_75
|
||||||
bool "Waveshare ESP32-S3-Touch-AMOLED-1.75"
|
bool "Waveshare ESP32-S3-Touch-AMOLED-1.75"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32S3_Touch_LCD_1_85C
|
config BOARD_TYPE_ESP32S3_Touch_LCD_1_85C
|
||||||
bool "Waveshare ESP32-S3-Touch-LCD-1.85C"
|
bool "Waveshare ESP32-S3-Touch-LCD-1.85C"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32S3_Touch_LCD_1_85
|
config BOARD_TYPE_ESP32S3_Touch_LCD_1_85
|
||||||
bool "Waveshare ESP32-S3-Touch-LCD-1.85"
|
bool "Waveshare ESP32-S3-Touch-LCD-1.85"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32S3_Touch_LCD_1_46
|
config BOARD_TYPE_ESP32S3_Touch_LCD_1_46
|
||||||
bool "Waveshare ESP32-S3-Touch-LCD-1.46"
|
bool "Waveshare ESP32-S3-Touch-LCD-1.46"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32S3_Touch_LCD_3_5
|
config BOARD_TYPE_ESP32S3_Touch_LCD_3_5
|
||||||
bool "Waveshare ESP32-S3-Touch-LCD-3.5"
|
bool "Waveshare ESP32-S3-Touch-LCD-3.5"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32P4_NANO
|
config BOARD_TYPE_ESP32P4_NANO
|
||||||
bool "Waveshare ESP32-P4-NANO"
|
bool "Waveshare ESP32-P4-NANO"
|
||||||
|
depends on IDF_TARGET_ESP32P4
|
||||||
config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_4B
|
config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_4B
|
||||||
bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-4B"
|
bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-4B"
|
||||||
|
depends on IDF_TARGET_ESP32P4
|
||||||
config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_XC
|
config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_XC
|
||||||
bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-3.4C or ESP32-P4-WIFI6-Touch-LCD-4C"
|
bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-3.4C or ESP32-P4-WIFI6-Touch-LCD-4C"
|
||||||
|
depends on IDF_TARGET_ESP32P4
|
||||||
config BOARD_TYPE_TUDOUZI
|
config BOARD_TYPE_TUDOUZI
|
||||||
bool "土豆子"
|
bool "土豆子"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_LILYGO_T_CIRCLE_S3
|
config BOARD_TYPE_LILYGO_T_CIRCLE_S3
|
||||||
bool "LILYGO T-Circle-S3"
|
bool "LILYGO T-Circle-S3"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_0_V1_1
|
config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_0_V1_1
|
||||||
bool "LILYGO T-CameraPlus-S3_V1_0_V1_1"
|
bool "LILYGO T-CameraPlus-S3_V1_0_V1_1"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_2
|
config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_2
|
||||||
bool "LILYGO T-CameraPlus-S3_V1_2"
|
bool "LILYGO T-CameraPlus-S3_V1_2"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA
|
config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA
|
||||||
bool "LILYGO T-Display-S3-Pro-MVSRLora"
|
bool "LILYGO T-Display-S3-Pro-MVSRLora"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA_NO_BATTERY
|
config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA_NO_BATTERY
|
||||||
bool "LILYGO T-Display-S3-Pro-MVSRLora_No_Battery"
|
bool "LILYGO T-Display-S3-Pro-MVSRLora_No_Battery"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_MOVECALL_MOJI_ESP32S3
|
config BOARD_TYPE_MOVECALL_MOJI_ESP32S3
|
||||||
bool "Movecall Moji 小智AI衍生版"
|
bool "Movecall Moji 小智AI衍生版"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_MOVECALL_CUICAN_ESP32S3
|
config BOARD_TYPE_MOVECALL_CUICAN_ESP32S3
|
||||||
bool "Movecall CuiCan 璀璨·AI吊坠"
|
bool "Movecall CuiCan 璀璨·AI吊坠"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATK_DNESP32S3
|
config BOARD_TYPE_ATK_DNESP32S3
|
||||||
bool "正点原子DNESP32S3开发板"
|
bool "正点原子DNESP32S3开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATK_DNESP32S3_BOX
|
config BOARD_TYPE_ATK_DNESP32S3_BOX
|
||||||
bool "正点原子DNESP32S3-BOX"
|
bool "正点原子DNESP32S3-BOX"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATK_DNESP32S3_BOX0
|
config BOARD_TYPE_ATK_DNESP32S3_BOX0
|
||||||
bool "正点原子DNESP32S3-BOX0"
|
bool "正点原子DNESP32S3-BOX0"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATK_DNESP32S3M_WIFI
|
config BOARD_TYPE_ATK_DNESP32S3M_WIFI
|
||||||
bool "正点原子DNESP32S3M-WIFI"
|
bool "正点原子DNESP32S3M-WIFI"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ATK_DNESP32S3M_4G
|
config BOARD_TYPE_ATK_DNESP32S3M_4G
|
||||||
bool "正点原子DNESP32S3M-4G"
|
bool "正点原子DNESP32S3M-4G"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_DU_CHATX
|
config BOARD_TYPE_DU_CHATX
|
||||||
bool "嘟嘟开发板CHATX(wifi)"
|
bool "嘟嘟开发板CHATX(wifi)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32S3_Taiji_Pi
|
config BOARD_TYPE_ESP32S3_Taiji_Pi
|
||||||
bool "太极小派esp32s3"
|
bool "太极小派esp32s3"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_XINGZHI_Cube_0_85TFT_WIFI
|
config BOARD_TYPE_XINGZHI_Cube_0_85TFT_WIFI
|
||||||
bool "无名科技星智0.85(WIFI)"
|
bool "无名科技星智0.85(WIFI)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_XINGZHI_Cube_0_85TFT_ML307
|
config BOARD_TYPE_XINGZHI_Cube_0_85TFT_ML307
|
||||||
bool "无名科技星智0.85(ML307)"
|
bool "无名科技星智0.85(ML307)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_XINGZHI_Cube_0_96OLED_WIFI
|
config BOARD_TYPE_XINGZHI_Cube_0_96OLED_WIFI
|
||||||
bool "无名科技星智0.96(WIFI)"
|
bool "无名科技星智0.96(WIFI)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_XINGZHI_Cube_0_96OLED_ML307
|
config BOARD_TYPE_XINGZHI_Cube_0_96OLED_ML307
|
||||||
bool "无名科技星智0.96(ML307)"
|
bool "无名科技星智0.96(ML307)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_XINGZHI_Cube_1_54TFT_WIFI
|
config BOARD_TYPE_XINGZHI_Cube_1_54TFT_WIFI
|
||||||
bool "无名科技星智1.54(WIFI)"
|
bool "无名科技星智1.54(WIFI)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_XINGZHI_Cube_1_54TFT_ML307
|
config BOARD_TYPE_XINGZHI_Cube_1_54TFT_ML307
|
||||||
bool "无名科技星智1.54(ML307)"
|
bool "无名科技星智1.54(ML307)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_SENSECAP_WATCHER
|
config BOARD_TYPE_SENSECAP_WATCHER
|
||||||
bool "SenseCAP Watcher"
|
bool "SenseCAP Watcher"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_DOIT_S3_AIBOX
|
config BOARD_TYPE_DOIT_S3_AIBOX
|
||||||
bool "四博智联AI陪伴盒子"
|
bool "四博智联AI陪伴盒子"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_MIXGO_NOVA
|
config BOARD_TYPE_MIXGO_NOVA
|
||||||
bool "元控·青春"
|
bool "元控·青春"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_GENJUTECH_S3_1_54TFT
|
config BOARD_TYPE_GENJUTECH_S3_1_54TFT
|
||||||
bool "亘具科技1.54(s3)"
|
bool "亘具科技1.54(s3)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP_S3_LCD_EV_Board
|
config BOARD_TYPE_ESP_S3_LCD_EV_Board
|
||||||
bool "乐鑫ESP S3 LCD EV Board开发板"
|
bool "乐鑫ESP S3 LCD EV Board开发板"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ZHENGCHEN_1_54TFT_WIFI
|
config BOARD_TYPE_ZHENGCHEN_1_54TFT_WIFI
|
||||||
bool "征辰科技1.54(WIFI)"
|
bool "征辰科技1.54(WIFI)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ZHENGCHEN_1_54TFT_ML307
|
config BOARD_TYPE_ZHENGCHEN_1_54TFT_ML307
|
||||||
bool "征辰科技1.54(ML307)"
|
bool "征辰科技1.54(ML307)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_MINSI_K08_DUAL
|
config BOARD_TYPE_MINSI_K08_DUAL
|
||||||
bool "敏思科技K08(DUAL)"
|
bool "敏思科技K08(DUAL)"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32_S3_1_54_MUMA
|
config BOARD_TYPE_ESP32_S3_1_54_MUMA
|
||||||
bool "Spotpear ESP32-S3-1.54-MUMA"
|
bool "Spotpear ESP32-S3-1.54-MUMA"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
config BOARD_TYPE_ESP32_S3_1_28_BOX
|
config BOARD_TYPE_ESP32_S3_1_28_BOX
|
||||||
bool "Spotpear ESP32-S3-1.28-BOX"
|
bool "Spotpear ESP32-S3-1.28-BOX"
|
||||||
|
depends on IDF_TARGET_ESP32S3
|
||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
choice ESP_S3_LCD_EV_Board_Version_TYPE
|
choice ESP_S3_LCD_EV_Board_Version_TYPE
|
||||||
@ -270,24 +344,26 @@ config USE_WECHAT_MESSAGE_STYLE
|
|||||||
help
|
help
|
||||||
使用微信聊天界面风格
|
使用微信聊天界面风格
|
||||||
|
|
||||||
config USE_WAKE_WORD_DETECT_NO_AFE
|
config USE_ESP_WAKE_WORD
|
||||||
bool "Enable Wake Word Detection (without AFE)"
|
bool "Enable Wake Word Detection (without AFE)"
|
||||||
default y
|
default y
|
||||||
depends on IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C5
|
depends on IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C5
|
||||||
|
|
||||||
config USE_WAKE_WORD_DETECT
|
|
||||||
bool "Enable Wake Word Detection"
|
|
||||||
default y
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM
|
|
||||||
help
|
help
|
||||||
需要 ESP32 S3 与 AFE 支持
|
支持 ESP32 C3 与 ESP32 C5
|
||||||
|
|
||||||
|
config USE_AFE_WAKE_WORD
|
||||||
|
bool "Enable Wake Word Detection (AFE)"
|
||||||
|
default n
|
||||||
|
depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM
|
||||||
|
help
|
||||||
|
需要 ESP32 S3 与 PSRAM 支持
|
||||||
|
|
||||||
config USE_AUDIO_PROCESSOR
|
config USE_AUDIO_PROCESSOR
|
||||||
bool "Enable Audio Noise Reduction"
|
bool "Enable Audio Noise Reduction"
|
||||||
default y
|
default y
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM
|
depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM
|
||||||
help
|
help
|
||||||
需要 ESP32 S3 与 AFE 支持
|
需要 ESP32 S3 与 PSRAM 支持
|
||||||
|
|
||||||
config USE_DEVICE_AEC
|
config USE_DEVICE_AEC
|
||||||
bool "Enable Device-Side AEC"
|
bool "Enable Device-Side AEC"
|
||||||
@ -297,7 +373,7 @@ config USE_DEVICE_AEC
|
|||||||
因为性能不够,不建议和微信聊天界面风格同时开启
|
因为性能不够,不建议和微信聊天界面风格同时开启
|
||||||
|
|
||||||
config USE_SERVER_AEC
|
config USE_SERVER_AEC
|
||||||
bool "Enable Server-Side AEC"
|
bool "Enable Server-Side AEC (Unstable)"
|
||||||
default n
|
default n
|
||||||
depends on USE_AUDIO_PROCESSOR
|
depends on USE_AUDIO_PROCESSOR
|
||||||
help
|
help
|
||||||
|
|||||||
@ -14,7 +14,15 @@
|
|||||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||||
#include "afe_audio_processor.h"
|
#include "afe_audio_processor.h"
|
||||||
#else
|
#else
|
||||||
#include "dummy_audio_processor.h"
|
#include "no_audio_processor.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_USE_AFE_WAKE_WORD
|
||||||
|
#include "afe_wake_word.h"
|
||||||
|
#elif CONFIG_USE_ESP_WAKE_WORD
|
||||||
|
#include "esp_wake_word.h"
|
||||||
|
#else
|
||||||
|
#include "no_wake_word.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@ -55,7 +63,15 @@ Application::Application() {
|
|||||||
#if CONFIG_USE_AUDIO_PROCESSOR
|
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||||
audio_processor_ = std::make_unique<AfeAudioProcessor>();
|
audio_processor_ = std::make_unique<AfeAudioProcessor>();
|
||||||
#else
|
#else
|
||||||
audio_processor_ = std::make_unique<DummyAudioProcessor>();
|
audio_processor_ = std::make_unique<NoAudioProcessor>();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_USE_AFE_WAKE_WORD
|
||||||
|
wake_word_ = std::make_unique<AfeWakeWord>();
|
||||||
|
#elif CONFIG_USE_ESP_WAKE_WORD
|
||||||
|
wake_word_ = std::make_unique<EspWakeWord>();
|
||||||
|
#else
|
||||||
|
wake_word_ = std::make_unique<NoWakeWord>();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
esp_timer_create_args_t clock_timer_args = {
|
esp_timer_create_args_t clock_timer_args = {
|
||||||
@ -129,9 +145,7 @@ void Application::CheckNewVersion() {
|
|||||||
|
|
||||||
auto& board = Board::GetInstance();
|
auto& board = Board::GetInstance();
|
||||||
board.SetPowerSaveMode(false);
|
board.SetPowerSaveMode(false);
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
wake_word_->StopDetection();
|
||||||
wake_word_detect_.StopDetection();
|
|
||||||
#endif
|
|
||||||
// 预先关闭音频输出,避免升级过程有音频操作
|
// 预先关闭音频输出,避免升级过程有音频操作
|
||||||
auto codec = board.GetAudioCodec();
|
auto codec = board.GetAudioCodec();
|
||||||
codec->EnableInput(false);
|
codec->EnableInput(false);
|
||||||
@ -256,8 +270,6 @@ void Application::PlaySound(const std::string_view& sound) {
|
|||||||
}
|
}
|
||||||
background_task_->WaitForCompletion();
|
background_task_->WaitForCompletion();
|
||||||
|
|
||||||
// The assets are encoded at 16000Hz, 60ms frame duration
|
|
||||||
SetDecodeSampleRate(16000, 60);
|
|
||||||
const char* data = sound.data();
|
const char* data = sound.data();
|
||||||
size_t size = sound.size();
|
size_t size = sound.size();
|
||||||
for (const char* p = data; p < data + size; ) {
|
for (const char* p = data; p < data + size; ) {
|
||||||
@ -266,6 +278,8 @@ void Application::PlaySound(const std::string_view& sound) {
|
|||||||
|
|
||||||
auto payload_size = ntohs(p3->payload_size);
|
auto payload_size = ntohs(p3->payload_size);
|
||||||
AudioStreamPacket packet;
|
AudioStreamPacket packet;
|
||||||
|
packet.sample_rate = 16000;
|
||||||
|
packet.frame_duration = 60;
|
||||||
packet.payload.resize(payload_size);
|
packet.payload.resize(payload_size);
|
||||||
memcpy(packet.payload.data(), p3->payload, payload_size);
|
memcpy(packet.payload.data(), p3->payload, payload_size);
|
||||||
p += payload_size;
|
p += payload_size;
|
||||||
@ -432,7 +446,7 @@ void Application::Start() {
|
|||||||
});
|
});
|
||||||
protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
|
protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
|
||||||
std::lock_guard<std::mutex> lock(mutex_);
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
if (audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
|
if (device_state_ == kDeviceStateSpeaking && audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||||
audio_decode_queue_.emplace_back(std::move(packet));
|
audio_decode_queue_.emplace_back(std::move(packet));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -442,7 +456,6 @@ void Application::Start() {
|
|||||||
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
|
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
|
||||||
protocol_->server_sample_rate(), codec->output_sample_rate());
|
protocol_->server_sample_rate(), codec->output_sample_rate());
|
||||||
}
|
}
|
||||||
SetDecodeSampleRate(protocol_->server_sample_rate(), protocol_->server_frame_duration());
|
|
||||||
|
|
||||||
#if CONFIG_IOT_PROTOCOL_XIAOZHI
|
#if CONFIG_IOT_PROTOCOL_XIAOZHI
|
||||||
auto& thing_manager = iot::ThingManager::GetInstance();
|
auto& thing_manager = iot::ThingManager::GetInstance();
|
||||||
@ -600,28 +613,40 @@ void Application::Start() {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
wake_word_->Initialize(codec);
|
||||||
wake_word_detect_.Initialize(codec);
|
wake_word_->OnWakeWordDetected([this](const std::string& wake_word) {
|
||||||
#ifdef CONFIG_USE_WAKE_WORD_DETECT
|
|
||||||
wake_word_detect_.OnWakeWordDetected([this](const std::string& wake_word) {
|
|
||||||
Schedule([this, &wake_word]() {
|
Schedule([this, &wake_word]() {
|
||||||
if (device_state_ == kDeviceStateIdle) {
|
if (!protocol_) {
|
||||||
SetDeviceState(kDeviceStateConnecting);
|
|
||||||
wake_word_detect_.EncodeWakeWordData();
|
|
||||||
|
|
||||||
if (!protocol_ || !protocol_->OpenAudioChannel()) {
|
|
||||||
wake_word_detect_.StartDetection();
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (device_state_ == kDeviceStateIdle) {
|
||||||
|
wake_word_->EncodeWakeWordData();
|
||||||
|
|
||||||
|
if (!protocol_->IsAudioChannelOpened()) {
|
||||||
|
SetDeviceState(kDeviceStateConnecting);
|
||||||
|
if (!protocol_->OpenAudioChannel()) {
|
||||||
|
wake_word_->StartDetection();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
|
||||||
|
#if CONFIG_USE_AFE_WAKE_WORD
|
||||||
AudioStreamPacket packet;
|
AudioStreamPacket packet;
|
||||||
// Encode and send the wake word data to the server
|
// Encode and send the wake word data to the server
|
||||||
while (wake_word_detect_.GetWakeWordOpus(packet.payload)) {
|
while (wake_word_->GetWakeWordOpus(packet.payload)) {
|
||||||
protocol_->SendAudio(packet);
|
protocol_->SendAudio(packet);
|
||||||
}
|
}
|
||||||
// Set the chat state to wake word detected
|
// Set the chat state to wake word detected
|
||||||
protocol_->SendWakeWordDetected(wake_word);
|
protocol_->SendWakeWordDetected(wake_word);
|
||||||
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
|
#else
|
||||||
|
// Play the pop up sound to indicate the wake word is detected
|
||||||
|
// And wait 60ms to make sure the queue has been processed by audio task
|
||||||
|
ResetDecoder();
|
||||||
|
PlaySound(Lang::Sounds::P3_POPUP);
|
||||||
|
vTaskDelay(pdMS_TO_TICKS(60));
|
||||||
|
#endif
|
||||||
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
|
||||||
} else if (device_state_ == kDeviceStateSpeaking) {
|
} else if (device_state_ == kDeviceStateSpeaking) {
|
||||||
AbortSpeaking(kAbortReasonWakeWordDetected);
|
AbortSpeaking(kAbortReasonWakeWordDetected);
|
||||||
@ -630,9 +655,7 @@ void Application::Start() {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
#endif
|
wake_word_->StartDetection();
|
||||||
wake_word_detect_.StartDetection();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Wait for the new version check to finish
|
// Wait for the new version check to finish
|
||||||
xEventGroupWaitBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT, pdTRUE, pdFALSE, portMAX_DELAY);
|
xEventGroupWaitBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT, pdTRUE, pdFALSE, portMAX_DELAY);
|
||||||
@ -751,17 +774,14 @@ void Application::OnAudioOutput() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_state_ == kDeviceStateListening) {
|
|
||||||
audio_decode_queue_.clear();
|
|
||||||
audio_decode_cv_.notify_all();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto packet = std::move(audio_decode_queue_.front());
|
auto packet = std::move(audio_decode_queue_.front());
|
||||||
audio_decode_queue_.pop_front();
|
audio_decode_queue_.pop_front();
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
audio_decode_cv_.notify_all();
|
audio_decode_cv_.notify_all();
|
||||||
|
|
||||||
|
// Synchronize the sample rate and frame duration
|
||||||
|
SetDecodeSampleRate(packet.sample_rate, packet.frame_duration);
|
||||||
|
|
||||||
busy_decoding_audio_ = true;
|
busy_decoding_audio_ = true;
|
||||||
background_task_->Schedule([this, codec, packet = std::move(packet)]() mutable {
|
background_task_->Schedule([this, codec, packet = std::move(packet)]() mutable {
|
||||||
busy_decoding_audio_ = false;
|
busy_decoding_audio_ = false;
|
||||||
@ -784,43 +804,46 @@ void Application::OnAudioOutput() {
|
|||||||
#ifdef CONFIG_USE_SERVER_AEC
|
#ifdef CONFIG_USE_SERVER_AEC
|
||||||
std::lock_guard<std::mutex> lock(timestamp_mutex_);
|
std::lock_guard<std::mutex> lock(timestamp_mutex_);
|
||||||
timestamp_queue_.push_back(packet.timestamp);
|
timestamp_queue_.push_back(packet.timestamp);
|
||||||
last_output_timestamp_ = packet.timestamp;
|
|
||||||
#endif
|
#endif
|
||||||
last_output_time_ = std::chrono::steady_clock::now();
|
last_output_time_ = std::chrono::steady_clock::now();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Application::OnAudioInput() {
|
void Application::OnAudioInput() {
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
if (wake_word_->IsDetectionRunning()) {
|
||||||
if (wake_word_detect_.IsDetectionRunning()) {
|
|
||||||
std::vector<int16_t> data;
|
std::vector<int16_t> data;
|
||||||
int samples = wake_word_detect_.GetFeedSize();
|
int samples = wake_word_->GetFeedSize();
|
||||||
if (samples > 0) {
|
if (samples > 0) {
|
||||||
ReadAudio(data, 16000, samples);
|
if (ReadAudio(data, 16000, samples)) {
|
||||||
wake_word_detect_.Feed(data);
|
wake_word_->Feed(data);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
}
|
||||||
if (audio_processor_->IsRunning()) {
|
if (audio_processor_->IsRunning()) {
|
||||||
std::vector<int16_t> data;
|
std::vector<int16_t> data;
|
||||||
int samples = audio_processor_->GetFeedSize();
|
int samples = audio_processor_->GetFeedSize();
|
||||||
if (samples > 0) {
|
if (samples > 0) {
|
||||||
ReadAudio(data, 16000, samples);
|
if (ReadAudio(data, 16000, samples)) {
|
||||||
audio_processor_->Feed(data);
|
audio_processor_->Feed(data);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vTaskDelay(pdMS_TO_TICKS(OPUS_FRAME_DURATION_MS / 2));
|
vTaskDelay(pdMS_TO_TICKS(OPUS_FRAME_DURATION_MS / 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples) {
|
bool Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples) {
|
||||||
auto codec = Board::GetInstance().GetAudioCodec();
|
auto codec = Board::GetInstance().GetAudioCodec();
|
||||||
|
if (!codec->input_enabled()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (codec->input_sample_rate() != sample_rate) {
|
if (codec->input_sample_rate() != sample_rate) {
|
||||||
data.resize(samples * codec->input_sample_rate() / sample_rate);
|
data.resize(samples * codec->input_sample_rate() / sample_rate);
|
||||||
if (!codec->InputData(data)) {
|
if (!codec->InputData(data)) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
if (codec->input_channels() == 2) {
|
if (codec->input_channels() == 2) {
|
||||||
auto mic_channel = std::vector<int16_t>(data.size() / 2);
|
auto mic_channel = std::vector<int16_t>(data.size() / 2);
|
||||||
@ -846,9 +869,10 @@ void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int sam
|
|||||||
} else {
|
} else {
|
||||||
data.resize(samples);
|
data.resize(samples);
|
||||||
if (!codec->InputData(data)) {
|
if (!codec->InputData(data)) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Application::AbortSpeaking(AbortReason reason) {
|
void Application::AbortSpeaking(AbortReason reason) {
|
||||||
@ -884,17 +908,13 @@ void Application::SetDeviceState(DeviceState state) {
|
|||||||
display->SetStatus(Lang::Strings::STANDBY);
|
display->SetStatus(Lang::Strings::STANDBY);
|
||||||
display->SetEmotion("neutral");
|
display->SetEmotion("neutral");
|
||||||
audio_processor_->Stop();
|
audio_processor_->Stop();
|
||||||
|
wake_word_->StartDetection();
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
|
||||||
wake_word_detect_.StartDetection();
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case kDeviceStateConnecting:
|
case kDeviceStateConnecting:
|
||||||
display->SetStatus(Lang::Strings::CONNECTING);
|
display->SetStatus(Lang::Strings::CONNECTING);
|
||||||
display->SetEmotion("neutral");
|
display->SetEmotion("neutral");
|
||||||
display->SetChatMessage("system", "");
|
display->SetChatMessage("system", "");
|
||||||
timestamp_queue_.clear();
|
timestamp_queue_.clear();
|
||||||
last_output_timestamp_ = 0;
|
|
||||||
break;
|
break;
|
||||||
case kDeviceStateListening:
|
case kDeviceStateListening:
|
||||||
display->SetStatus(Lang::Strings::LISTENING);
|
display->SetStatus(Lang::Strings::LISTENING);
|
||||||
@ -909,14 +929,14 @@ void Application::SetDeviceState(DeviceState state) {
|
|||||||
// Send the start listening command
|
// Send the start listening command
|
||||||
protocol_->SendStartListening(listening_mode_);
|
protocol_->SendStartListening(listening_mode_);
|
||||||
if (previous_state == kDeviceStateSpeaking) {
|
if (previous_state == kDeviceStateSpeaking) {
|
||||||
|
audio_decode_queue_.clear();
|
||||||
|
audio_decode_cv_.notify_all();
|
||||||
// FIXME: Wait for the speaker to empty the buffer
|
// FIXME: Wait for the speaker to empty the buffer
|
||||||
vTaskDelay(pdMS_TO_TICKS(120));
|
vTaskDelay(pdMS_TO_TICKS(120));
|
||||||
}
|
}
|
||||||
opus_encoder_->ResetState();
|
opus_encoder_->ResetState();
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
|
||||||
wake_word_detect_.StopDetection();
|
|
||||||
#endif
|
|
||||||
audio_processor_->Start();
|
audio_processor_->Start();
|
||||||
|
wake_word_->StopDetection();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case kDeviceStateSpeaking:
|
case kDeviceStateSpeaking:
|
||||||
@ -924,8 +944,11 @@ void Application::SetDeviceState(DeviceState state) {
|
|||||||
|
|
||||||
if (listening_mode_ != kListeningModeRealtime) {
|
if (listening_mode_ != kListeningModeRealtime) {
|
||||||
audio_processor_->Stop();
|
audio_processor_->Stop();
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
// Only AFE wake word can be detected in speaking mode
|
||||||
wake_word_detect_.StartDetection();
|
#if CONFIG_USE_AFE_WAKE_WORD
|
||||||
|
wake_word_->StartDetection();
|
||||||
|
#else
|
||||||
|
wake_word_->StopDetection();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
ResetDecoder();
|
ResetDecoder();
|
||||||
|
|||||||
@ -21,12 +21,7 @@
|
|||||||
#include "ota.h"
|
#include "ota.h"
|
||||||
#include "background_task.h"
|
#include "background_task.h"
|
||||||
#include "audio_processor.h"
|
#include "audio_processor.h"
|
||||||
|
#include "wake_word.h"
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT
|
|
||||||
#include "wake_word_detect.h"
|
|
||||||
#elif CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
|
||||||
#include "wake_word_no_afe.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define SCHEDULE_EVENT (1 << 0)
|
#define SCHEDULE_EVENT (1 << 0)
|
||||||
#define SEND_AUDIO_EVENT (1 << 1)
|
#define SEND_AUDIO_EVENT (1 << 1)
|
||||||
@ -83,14 +78,13 @@ public:
|
|||||||
void SendMcpMessage(const std::string& payload);
|
void SendMcpMessage(const std::string& payload);
|
||||||
void SetAecMode(AecMode mode);
|
void SetAecMode(AecMode mode);
|
||||||
AecMode GetAecMode() const { return aec_mode_; }
|
AecMode GetAecMode() const { return aec_mode_; }
|
||||||
|
BackgroundTask* GetBackgroundTask() const { return background_task_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Application();
|
Application();
|
||||||
~Application();
|
~Application();
|
||||||
|
|
||||||
#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
|
std::unique_ptr<WakeWord> wake_word_;
|
||||||
WakeWordDetect wake_word_detect_;
|
|
||||||
#endif
|
|
||||||
std::unique_ptr<AudioProcessor> audio_processor_;
|
std::unique_ptr<AudioProcessor> audio_processor_;
|
||||||
Ota ota_;
|
Ota ota_;
|
||||||
std::mutex mutex_;
|
std::mutex mutex_;
|
||||||
@ -119,7 +113,6 @@ private:
|
|||||||
// 新增:用于维护音频包的timestamp队列
|
// 新增:用于维护音频包的timestamp队列
|
||||||
std::list<uint32_t> timestamp_queue_;
|
std::list<uint32_t> timestamp_queue_;
|
||||||
std::mutex timestamp_mutex_;
|
std::mutex timestamp_mutex_;
|
||||||
std::atomic<uint32_t> last_output_timestamp_ = 0;
|
|
||||||
|
|
||||||
std::unique_ptr<OpusEncoderWrapper> opus_encoder_;
|
std::unique_ptr<OpusEncoderWrapper> opus_encoder_;
|
||||||
std::unique_ptr<OpusDecoderWrapper> opus_decoder_;
|
std::unique_ptr<OpusDecoderWrapper> opus_decoder_;
|
||||||
@ -131,7 +124,7 @@ private:
|
|||||||
void MainEventLoop();
|
void MainEventLoop();
|
||||||
void OnAudioInput();
|
void OnAudioInput();
|
||||||
void OnAudioOutput();
|
void OnAudioOutput();
|
||||||
void ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples);
|
bool ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples);
|
||||||
void ResetDecoder();
|
void ResetDecoder();
|
||||||
void SetDecodeSampleRate(int sample_rate, int frame_duration);
|
void SetDecodeSampleRate(int sample_rate, int frame_duration);
|
||||||
void CheckNewVersion();
|
void CheckNewVersion();
|
||||||
|
|||||||
BIN
main/assets/common/popup.p3
Normal file
BIN
main/assets/common/popup.p3
Normal file
Binary file not shown.
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#define PROCESSOR_RUNNING 0x01
|
#define PROCESSOR_RUNNING 0x01
|
||||||
|
|
||||||
static const char* TAG = "AfeAudioProcessor";
|
#define TAG "AfeAudioProcessor"
|
||||||
|
|
||||||
AfeAudioProcessor::AfeAudioProcessor()
|
AfeAudioProcessor::AfeAudioProcessor()
|
||||||
: afe_data_(nullptr) {
|
: afe_data_(nullptr) {
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#include "wake_word_detect.h"
|
#include "afe_wake_word.h"
|
||||||
#include "application.h"
|
#include "application.h"
|
||||||
|
|
||||||
#include <esp_log.h>
|
#include <esp_log.h>
|
||||||
@ -8,9 +8,9 @@
|
|||||||
|
|
||||||
#define DETECTION_RUNNING_EVENT 1
|
#define DETECTION_RUNNING_EVENT 1
|
||||||
|
|
||||||
static const char* TAG = "WakeWordDetect";
|
#define TAG "AfeWakeWord"
|
||||||
|
|
||||||
WakeWordDetect::WakeWordDetect()
|
AfeWakeWord::AfeWakeWord()
|
||||||
: afe_data_(nullptr),
|
: afe_data_(nullptr),
|
||||||
wake_word_pcm_(),
|
wake_word_pcm_(),
|
||||||
wake_word_opus_() {
|
wake_word_opus_() {
|
||||||
@ -18,7 +18,7 @@ WakeWordDetect::WakeWordDetect()
|
|||||||
event_group_ = xEventGroupCreate();
|
event_group_ = xEventGroupCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
WakeWordDetect::~WakeWordDetect() {
|
AfeWakeWord::~AfeWakeWord() {
|
||||||
if (afe_data_ != nullptr) {
|
if (afe_data_ != nullptr) {
|
||||||
afe_iface_->destroy(afe_data_);
|
afe_iface_->destroy(afe_data_);
|
||||||
}
|
}
|
||||||
@ -30,7 +30,7 @@ WakeWordDetect::~WakeWordDetect() {
|
|||||||
vEventGroupDelete(event_group_);
|
vEventGroupDelete(event_group_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::Initialize(AudioCodec* codec) {
|
void AfeWakeWord::Initialize(AudioCodec* codec) {
|
||||||
codec_ = codec;
|
codec_ = codec;
|
||||||
int ref_num = codec_->input_reference() ? 1 : 0;
|
int ref_num = codec_->input_reference() ? 1 : 0;
|
||||||
|
|
||||||
@ -67,46 +67,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) {
|
|||||||
afe_data_ = afe_iface_->create_from_config(afe_config);
|
afe_data_ = afe_iface_->create_from_config(afe_config);
|
||||||
|
|
||||||
xTaskCreate([](void* arg) {
|
xTaskCreate([](void* arg) {
|
||||||
auto this_ = (WakeWordDetect*)arg;
|
auto this_ = (AfeWakeWord*)arg;
|
||||||
this_->AudioDetectionTask();
|
this_->AudioDetectionTask();
|
||||||
vTaskDelete(NULL);
|
vTaskDelete(NULL);
|
||||||
}, "audio_detection", 4096, this, 3, nullptr);
|
}, "audio_detection", 4096, this, 3, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
void AfeWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
||||||
wake_word_detected_callback_ = callback;
|
wake_word_detected_callback_ = callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::StartDetection() {
|
void AfeWakeWord::StartDetection() {
|
||||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
|
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::StopDetection() {
|
void AfeWakeWord::StopDetection() {
|
||||||
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||||
if (afe_data_ != nullptr) {
|
if (afe_data_ != nullptr) {
|
||||||
afe_iface_->reset_buffer(afe_data_);
|
afe_iface_->reset_buffer(afe_data_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WakeWordDetect::IsDetectionRunning() {
|
bool AfeWakeWord::IsDetectionRunning() {
|
||||||
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::Feed(const std::vector<int16_t>& data) {
|
void AfeWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||||
if (afe_data_ == nullptr) {
|
if (afe_data_ == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
afe_iface_->feed(afe_data_, data.data());
|
afe_iface_->feed(afe_data_, data.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t WakeWordDetect::GetFeedSize() {
|
size_t AfeWakeWord::GetFeedSize() {
|
||||||
if (afe_data_ == nullptr) {
|
if (afe_data_ == nullptr) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
|
return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::AudioDetectionTask() {
|
void AfeWakeWord::AudioDetectionTask() {
|
||||||
auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_);
|
auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_);
|
||||||
auto feed_size = afe_iface_->get_feed_chunksize(afe_data_);
|
auto feed_size = afe_iface_->get_feed_chunksize(afe_data_);
|
||||||
ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d",
|
ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d",
|
||||||
@ -121,7 +121,7 @@ void WakeWordDetect::AudioDetectionTask() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Store the wake word data for voice recognition, like who is speaking
|
// Store the wake word data for voice recognition, like who is speaking
|
||||||
StoreWakeWordData((uint16_t*)res->data, res->data_size / sizeof(uint16_t));
|
StoreWakeWordData(res->data, res->data_size / sizeof(int16_t));
|
||||||
|
|
||||||
if (res->wakeup_state == WAKENET_DETECTED) {
|
if (res->wakeup_state == WAKENET_DETECTED) {
|
||||||
StopDetection();
|
StopDetection();
|
||||||
@ -134,7 +134,7 @@ void WakeWordDetect::AudioDetectionTask() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
|
void AfeWakeWord::StoreWakeWordData(const int16_t* data, size_t samples) {
|
||||||
// store audio data to wake_word_pcm_
|
// store audio data to wake_word_pcm_
|
||||||
wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
|
wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
|
||||||
// keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512)
|
// keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512)
|
||||||
@ -143,13 +143,13 @@ void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::EncodeWakeWordData() {
|
void AfeWakeWord::EncodeWakeWordData() {
|
||||||
wake_word_opus_.clear();
|
wake_word_opus_.clear();
|
||||||
if (wake_word_encode_task_stack_ == nullptr) {
|
if (wake_word_encode_task_stack_ == nullptr) {
|
||||||
wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(4096 * 8, MALLOC_CAP_SPIRAM);
|
wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(4096 * 8, MALLOC_CAP_SPIRAM);
|
||||||
}
|
}
|
||||||
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
|
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
|
||||||
auto this_ = (WakeWordDetect*)arg;
|
auto this_ = (AfeWakeWord*)arg;
|
||||||
{
|
{
|
||||||
auto start_time = esp_timer_get_time();
|
auto start_time = esp_timer_get_time();
|
||||||
auto encoder = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
|
auto encoder = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
|
||||||
@ -176,7 +176,7 @@ void WakeWordDetect::EncodeWakeWordData() {
|
|||||||
}, "encode_detect_packets", 4096 * 8, this, 2, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
|
}, "encode_detect_packets", 4096 * 8, this, 2, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WakeWordDetect::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
bool AfeWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
||||||
std::unique_lock<std::mutex> lock(wake_word_mutex_);
|
std::unique_lock<std::mutex> lock(wake_word_mutex_);
|
||||||
wake_word_cv_.wait(lock, [this]() {
|
wake_word_cv_.wait(lock, [this]() {
|
||||||
return !wake_word_opus_.empty();
|
return !wake_word_opus_.empty();
|
||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef WAKE_WORD_DETECT_H
|
#ifndef AFE_WAKE_WORD_H
|
||||||
#define WAKE_WORD_DETECT_H
|
#define AFE_WAKE_WORD_H
|
||||||
|
|
||||||
#include <freertos/FreeRTOS.h>
|
#include <freertos/FreeRTOS.h>
|
||||||
#include <freertos/task.h>
|
#include <freertos/task.h>
|
||||||
@ -16,11 +16,12 @@
|
|||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
|
|
||||||
#include "audio_codec.h"
|
#include "audio_codec.h"
|
||||||
|
#include "wake_word.h"
|
||||||
|
|
||||||
class WakeWordDetect {
|
class AfeWakeWord : public WakeWord {
|
||||||
public:
|
public:
|
||||||
WakeWordDetect();
|
AfeWakeWord();
|
||||||
~WakeWordDetect();
|
~AfeWakeWord();
|
||||||
|
|
||||||
void Initialize(AudioCodec* codec);
|
void Initialize(AudioCodec* codec);
|
||||||
void Feed(const std::vector<int16_t>& data);
|
void Feed(const std::vector<int16_t>& data);
|
||||||
@ -51,7 +52,7 @@ private:
|
|||||||
std::mutex wake_word_mutex_;
|
std::mutex wake_word_mutex_;
|
||||||
std::condition_variable wake_word_cv_;
|
std::condition_variable wake_word_cv_;
|
||||||
|
|
||||||
void StoreWakeWordData(uint16_t* data, size_t size);
|
void StoreWakeWordData(const int16_t* data, size_t size);
|
||||||
void AudioDetectionTask();
|
void AudioDetectionTask();
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1,4 +1,4 @@
|
|||||||
#include "wake_word_no_afe.h"
|
#include "esp_wake_word.h"
|
||||||
#include "application.h"
|
#include "application.h"
|
||||||
|
|
||||||
#include <esp_log.h>
|
#include <esp_log.h>
|
||||||
@ -8,13 +8,13 @@
|
|||||||
|
|
||||||
#define DETECTION_RUNNING_EVENT 1
|
#define DETECTION_RUNNING_EVENT 1
|
||||||
|
|
||||||
static const char* TAG = "WakeWordDetect";
|
#define TAG "EspWakeWord"
|
||||||
|
|
||||||
WakeWordDetect::WakeWordDetect() {
|
EspWakeWord::EspWakeWord() {
|
||||||
event_group_ = xEventGroupCreate();
|
event_group_ = xEventGroupCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
WakeWordDetect::~WakeWordDetect() {
|
EspWakeWord::~EspWakeWord() {
|
||||||
if (wakenet_data_ != nullptr) {
|
if (wakenet_data_ != nullptr) {
|
||||||
wakenet_iface_->destroy(wakenet_data_);
|
wakenet_iface_->destroy(wakenet_data_);
|
||||||
esp_srmodel_deinit(wakenet_model_);
|
esp_srmodel_deinit(wakenet_model_);
|
||||||
@ -23,13 +23,16 @@ WakeWordDetect::~WakeWordDetect() {
|
|||||||
vEventGroupDelete(event_group_);
|
vEventGroupDelete(event_group_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::Initialize(AudioCodec* codec) {
|
void EspWakeWord::Initialize(AudioCodec* codec) {
|
||||||
codec_ = codec;
|
codec_ = codec;
|
||||||
|
|
||||||
wakenet_model_ = esp_srmodel_init("model");
|
wakenet_model_ = esp_srmodel_init("model");
|
||||||
|
|
||||||
if(wakenet_model_->num > 1) {
|
if(wakenet_model_->num > 1) {
|
||||||
ESP_LOGW(TAG, "More than one model found, using the first one");
|
ESP_LOGW(TAG, "More than one model found, using the first one");
|
||||||
|
} else if (wakenet_model_->num == 0) {
|
||||||
|
ESP_LOGE(TAG, "No model found");
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
char *model_name = wakenet_model_->model_name[0];
|
char *model_name = wakenet_model_->model_name[0];
|
||||||
wakenet_iface_ = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
|
wakenet_iface_ = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
|
||||||
@ -40,28 +43,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) {
|
|||||||
ESP_LOGI(TAG, "Wake word(%s),freq: %d, chunksize: %d", model_name, frequency, audio_chunksize);
|
ESP_LOGI(TAG, "Wake word(%s),freq: %d, chunksize: %d", model_name, frequency, audio_chunksize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::StartDetection() {
|
void EspWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
||||||
|
wake_word_detected_callback_ = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EspWakeWord::StartDetection() {
|
||||||
|
ESP_LOGI(TAG, "Start wake word detection");
|
||||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
|
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::StopDetection() {
|
void EspWakeWord::StopDetection() {
|
||||||
|
ESP_LOGI(TAG, "Stop wake word detection");
|
||||||
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WakeWordDetect::IsDetectionRunning() {
|
bool EspWakeWord::IsDetectionRunning() {
|
||||||
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WakeWordDetect::Feed(const std::vector<int16_t>& data) {
|
void EspWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||||
int res = wakenet_iface_->detect(wakenet_data_, (int16_t *)data.data());
|
int res = wakenet_iface_->detect(wakenet_data_, (int16_t *)data.data());
|
||||||
if (res > 0) {
|
if (res > 0) {
|
||||||
ESP_LOGI(TAG, "Wake word detected");
|
StopDetection();
|
||||||
auto& app = Application::GetInstance();
|
last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res);
|
||||||
app.ToggleChatState();
|
|
||||||
|
if (wake_word_detected_callback_) {
|
||||||
|
wake_word_detected_callback_(last_detected_wake_word_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t WakeWordDetect::GetFeedSize() {
|
size_t EspWakeWord::GetFeedSize() {
|
||||||
|
if (wakenet_data_ == nullptr) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
return wakenet_iface_->get_samp_chunksize(wakenet_data_) * codec_->input_channels();
|
return wakenet_iface_->get_samp_chunksize(wakenet_data_) * codec_->input_channels();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EspWakeWord::EncodeWakeWordData() {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool EspWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
@ -1,13 +1,13 @@
|
|||||||
#ifndef WAKE_WORD_DETECT_H
|
#ifndef ESP_WAKE_WORD_H
|
||||||
#define WAKE_WORD_DETECT_H
|
#define ESP_WAKE_WORD_H
|
||||||
|
|
||||||
#include <freertos/FreeRTOS.h>
|
#include <freertos/FreeRTOS.h>
|
||||||
#include <freertos/task.h>
|
#include <freertos/task.h>
|
||||||
#include <freertos/event_groups.h>
|
#include <freertos/event_groups.h>
|
||||||
|
|
||||||
#include "model_path.h"
|
#include <esp_wn_iface.h>
|
||||||
#include "esp_wn_iface.h"
|
#include <esp_wn_models.h>
|
||||||
#include "esp_wn_models.h"
|
#include <model_path.h>
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -17,19 +17,23 @@
|
|||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
|
|
||||||
#include "audio_codec.h"
|
#include "audio_codec.h"
|
||||||
#include <model_path.h>
|
#include "wake_word.h"
|
||||||
|
|
||||||
class WakeWordDetect {
|
class EspWakeWord : public WakeWord {
|
||||||
public:
|
public:
|
||||||
WakeWordDetect();
|
EspWakeWord();
|
||||||
~WakeWordDetect();
|
~EspWakeWord();
|
||||||
|
|
||||||
void Initialize(AudioCodec* codec);
|
void Initialize(AudioCodec* codec);
|
||||||
void Feed(const std::vector<int16_t>& data);
|
void Feed(const std::vector<int16_t>& data);
|
||||||
|
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
|
||||||
void StartDetection();
|
void StartDetection();
|
||||||
void StopDetection();
|
void StopDetection();
|
||||||
bool IsDetectionRunning();
|
bool IsDetectionRunning();
|
||||||
size_t GetFeedSize();
|
size_t GetFeedSize();
|
||||||
|
void EncodeWakeWordData();
|
||||||
|
bool GetWakeWordOpus(std::vector<uint8_t>& opus);
|
||||||
|
const std::string& GetLastDetectedWakeWord() const { return last_detected_wake_word_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
esp_wn_iface_t *wakenet_iface_ = nullptr;
|
esp_wn_iface_t *wakenet_iface_ = nullptr;
|
||||||
@ -37,6 +41,9 @@ private:
|
|||||||
srmodel_list_t *wakenet_model_ = nullptr;
|
srmodel_list_t *wakenet_model_ = nullptr;
|
||||||
EventGroupHandle_t event_group_;
|
EventGroupHandle_t event_group_;
|
||||||
AudioCodec* codec_ = nullptr;
|
AudioCodec* codec_ = nullptr;
|
||||||
|
|
||||||
|
std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
|
||||||
|
std::string last_detected_wake_word_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -1,13 +1,13 @@
|
|||||||
#include "dummy_audio_processor.h"
|
#include "no_audio_processor.h"
|
||||||
#include <esp_log.h>
|
#include <esp_log.h>
|
||||||
|
|
||||||
#define TAG "DummyAudioProcessor"
|
#define TAG "NoAudioProcessor"
|
||||||
|
|
||||||
void DummyAudioProcessor::Initialize(AudioCodec* codec) {
|
void NoAudioProcessor::Initialize(AudioCodec* codec) {
|
||||||
codec_ = codec;
|
codec_ = codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DummyAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
void NoAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
||||||
if (!is_running_ || !output_callback_) {
|
if (!is_running_ || !output_callback_) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -15,27 +15,27 @@ void DummyAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
|||||||
output_callback_(std::vector<int16_t>(data));
|
output_callback_(std::vector<int16_t>(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
void DummyAudioProcessor::Start() {
|
void NoAudioProcessor::Start() {
|
||||||
is_running_ = true;
|
is_running_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DummyAudioProcessor::Stop() {
|
void NoAudioProcessor::Stop() {
|
||||||
is_running_ = false;
|
is_running_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DummyAudioProcessor::IsRunning() {
|
bool NoAudioProcessor::IsRunning() {
|
||||||
return is_running_;
|
return is_running_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DummyAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
|
void NoAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
|
||||||
output_callback_ = callback;
|
output_callback_ = callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DummyAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
|
void NoAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
|
||||||
vad_state_change_callback_ = callback;
|
vad_state_change_callback_ = callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t DummyAudioProcessor::GetFeedSize() {
|
size_t NoAudioProcessor::GetFeedSize() {
|
||||||
if (!codec_) {
|
if (!codec_) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -43,7 +43,7 @@ size_t DummyAudioProcessor::GetFeedSize() {
|
|||||||
return 30 * codec_->input_sample_rate() / 1000;
|
return 30 * codec_->input_sample_rate() / 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DummyAudioProcessor::EnableDeviceAec(bool enable) {
|
void NoAudioProcessor::EnableDeviceAec(bool enable) {
|
||||||
if (enable) {
|
if (enable) {
|
||||||
ESP_LOGE(TAG, "Device AEC is not supported");
|
ESP_LOGE(TAG, "Device AEC is not supported");
|
||||||
}
|
}
|
||||||
@ -7,10 +7,10 @@
|
|||||||
#include "audio_processor.h"
|
#include "audio_processor.h"
|
||||||
#include "audio_codec.h"
|
#include "audio_codec.h"
|
||||||
|
|
||||||
class DummyAudioProcessor : public AudioProcessor {
|
class NoAudioProcessor : public AudioProcessor {
|
||||||
public:
|
public:
|
||||||
DummyAudioProcessor() = default;
|
NoAudioProcessor() = default;
|
||||||
~DummyAudioProcessor() = default;
|
~NoAudioProcessor() = default;
|
||||||
|
|
||||||
void Initialize(AudioCodec* codec) override;
|
void Initialize(AudioCodec* codec) override;
|
||||||
void Feed(const std::vector<int16_t>& data) override;
|
void Feed(const std::vector<int16_t>& data) override;
|
||||||
45
main/audio_processing/no_wake_word.cc
Normal file
45
main/audio_processing/no_wake_word.cc
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#include "no_wake_word.h"
|
||||||
|
#include <esp_log.h>
|
||||||
|
|
||||||
|
#define TAG "NoWakeWord"
|
||||||
|
|
||||||
|
void NoWakeWord::Initialize(AudioCodec* codec) {
|
||||||
|
codec_ = codec;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NoWakeWord::Feed(const std::vector<int16_t>& data) {
|
||||||
|
// Do nothing - no wake word processing
|
||||||
|
}
|
||||||
|
|
||||||
|
void NoWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
|
||||||
|
// Do nothing - no wake word processing
|
||||||
|
}
|
||||||
|
|
||||||
|
void NoWakeWord::StartDetection() {
|
||||||
|
// Do nothing - no wake word processing
|
||||||
|
}
|
||||||
|
|
||||||
|
void NoWakeWord::StopDetection() {
|
||||||
|
// Do nothing - no wake word processing
|
||||||
|
}
|
||||||
|
|
||||||
|
bool NoWakeWord::IsDetectionRunning() {
|
||||||
|
return false; // No wake word processing
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t NoWakeWord::GetFeedSize() {
|
||||||
|
return 0; // No specific feed size requirement
|
||||||
|
}
|
||||||
|
|
||||||
|
void NoWakeWord::EncodeWakeWordData() {
|
||||||
|
// Do nothing - no encoding needed
|
||||||
|
}
|
||||||
|
|
||||||
|
bool NoWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
|
||||||
|
opus.clear();
|
||||||
|
return false; // No opus data available
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string& NoWakeWord::GetLastDetectedWakeWord() const {
|
||||||
|
return ""; // No wake word detected
|
||||||
|
}
|
||||||
31
main/audio_processing/no_wake_word.h
Normal file
31
main/audio_processing/no_wake_word.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#ifndef NO_WAKE_WORD_H
|
||||||
|
#define NO_WAKE_WORD_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <functional>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "wake_word.h"
|
||||||
|
#include "audio_codec.h"
|
||||||
|
|
||||||
|
class NoWakeWord : public WakeWord {
|
||||||
|
public:
|
||||||
|
NoWakeWord() = default;
|
||||||
|
~NoWakeWord() = default;
|
||||||
|
|
||||||
|
void Initialize(AudioCodec* codec) override;
|
||||||
|
void Feed(const std::vector<int16_t>& data) override;
|
||||||
|
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) override;
|
||||||
|
void StartDetection() override;
|
||||||
|
void StopDetection() override;
|
||||||
|
bool IsDetectionRunning() override;
|
||||||
|
size_t GetFeedSize() override;
|
||||||
|
void EncodeWakeWordData() override;
|
||||||
|
bool GetWakeWordOpus(std::vector<uint8_t>& opus) override;
|
||||||
|
const std::string& GetLastDetectedWakeWord() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
AudioCodec* codec_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
26
main/audio_processing/wake_word.h
Normal file
26
main/audio_processing/wake_word.h
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#ifndef WAKE_WORD_H
|
||||||
|
#define WAKE_WORD_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
#include "audio_codec.h"
|
||||||
|
|
||||||
|
class WakeWord {
|
||||||
|
public:
|
||||||
|
virtual ~WakeWord() = default;
|
||||||
|
|
||||||
|
virtual void Initialize(AudioCodec* codec) = 0;
|
||||||
|
virtual void Feed(const std::vector<int16_t>& data) = 0;
|
||||||
|
virtual void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) = 0;
|
||||||
|
virtual void StartDetection() = 0;
|
||||||
|
virtual void StopDetection() = 0;
|
||||||
|
virtual bool IsDetectionRunning() = 0;
|
||||||
|
virtual size_t GetFeedSize() = 0;
|
||||||
|
virtual void EncodeWakeWordData() = 0;
|
||||||
|
virtual bool GetWakeWordOpus(std::vector<uint8_t>& opus) = 0;
|
||||||
|
virtual const std::string& GetLastDetectedWakeWord() const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -30,7 +30,8 @@
|
|||||||
"CONFIG_MBEDTLS_DYNAMIC_FREE_CONFIG_DATA=y",
|
"CONFIG_MBEDTLS_DYNAMIC_FREE_CONFIG_DATA=y",
|
||||||
"CONFIG_NEWLIB_NANO_FORMAT=y",
|
"CONFIG_NEWLIB_NANO_FORMAT=y",
|
||||||
"CONFIG_MMAP_FILE_NAME_LENGTH=25",
|
"CONFIG_MMAP_FILE_NAME_LENGTH=25",
|
||||||
"CONFIG_ESP_CONSOLE_NONE=y"
|
"CONFIG_ESP_CONSOLE_NONE=y",
|
||||||
|
"CONFIG_IOT_PROTOCOL_XIAOZHI=y"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -70,7 +70,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void InitializePowerSaveTimer() {
|
void InitializePowerSaveTimer() {
|
||||||
power_save_timer_ = new PowerSaveTimer(160, 60);
|
power_save_timer_ = new PowerSaveTimer(240, 60);
|
||||||
power_save_timer_->OnEnterSleepMode([this]() {
|
power_save_timer_->OnEnterSleepMode([this]() {
|
||||||
ESP_LOGI(TAG, "Enabling sleep mode");
|
ESP_LOGI(TAG, "Enabling sleep mode");
|
||||||
auto display = GetDisplay();
|
auto display = GetDisplay();
|
||||||
|
|||||||
@ -5,7 +5,9 @@
|
|||||||
"name": "lichuang-c3-dev",
|
"name": "lichuang-c3-dev",
|
||||||
"sdkconfig_append": [
|
"sdkconfig_append": [
|
||||||
"CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y",
|
"CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y",
|
||||||
"CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\""
|
"CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\"",
|
||||||
|
"CONFIG_ESP_WIFI_ENTERPRISE_SUPPORT=n",
|
||||||
|
"CONFIG_LWIP_IPV6=n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -5,7 +5,8 @@
|
|||||||
"name": "magiclick-c3-v2",
|
"name": "magiclick-c3-v2",
|
||||||
"sdkconfig_append": [
|
"sdkconfig_append": [
|
||||||
"CONFIG_PM_ENABLE=y",
|
"CONFIG_PM_ENABLE=y",
|
||||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
|
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
|
||||||
|
"CONFIG_USE_ESP_WAKE_WORD=n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -5,7 +5,8 @@
|
|||||||
"name": "magiclick-c3",
|
"name": "magiclick-c3",
|
||||||
"sdkconfig_append": [
|
"sdkconfig_append": [
|
||||||
"CONFIG_PM_ENABLE=y",
|
"CONFIG_PM_ENABLE=y",
|
||||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
|
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
|
||||||
|
"CONFIG_USE_ESP_WAKE_WORD=n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -5,7 +5,8 @@
|
|||||||
"name": "xmini-c3",
|
"name": "xmini-c3",
|
||||||
"sdkconfig_append": [
|
"sdkconfig_append": [
|
||||||
"CONFIG_PM_ENABLE=y",
|
"CONFIG_PM_ENABLE=y",
|
||||||
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
|
"CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
|
||||||
|
"CONFIG_USE_ESP_WAKE_WORD=y"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -30,10 +30,10 @@ private:
|
|||||||
Display* display_ = nullptr;
|
Display* display_ = nullptr;
|
||||||
Button boot_button_;
|
Button boot_button_;
|
||||||
bool press_to_talk_enabled_ = false;
|
bool press_to_talk_enabled_ = false;
|
||||||
PowerSaveTimer* power_save_timer_;
|
PowerSaveTimer* power_save_timer_ = nullptr;
|
||||||
|
|
||||||
void InitializePowerSaveTimer() {
|
void InitializePowerSaveTimer() {
|
||||||
power_save_timer_ = new PowerSaveTimer(160, 60);
|
power_save_timer_ = new PowerSaveTimer(160, 600);
|
||||||
power_save_timer_->OnEnterSleepMode([this]() {
|
power_save_timer_->OnEnterSleepMode([this]() {
|
||||||
ESP_LOGI(TAG, "Enabling sleep mode");
|
ESP_LOGI(TAG, "Enabling sleep mode");
|
||||||
auto display = GetDisplay();
|
auto display = GetDisplay();
|
||||||
@ -130,7 +130,9 @@ private:
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
boot_button_.OnPressDown([this]() {
|
boot_button_.OnPressDown([this]() {
|
||||||
|
if (power_save_timer_) {
|
||||||
power_save_timer_->WakeUp();
|
power_save_timer_->WakeUp();
|
||||||
|
}
|
||||||
if (press_to_talk_enabled_) {
|
if (press_to_talk_enabled_) {
|
||||||
Application::GetInstance().StartListening();
|
Application::GetInstance().StartListening();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -227,6 +227,8 @@ bool MqttProtocol::OpenAudioChannel() {
|
|||||||
auto nonce = (uint8_t*)data.data();
|
auto nonce = (uint8_t*)data.data();
|
||||||
auto encrypted = (uint8_t*)data.data() + aes_nonce_.size();
|
auto encrypted = (uint8_t*)data.data() + aes_nonce_.size();
|
||||||
AudioStreamPacket packet;
|
AudioStreamPacket packet;
|
||||||
|
packet.sample_rate = server_sample_rate_;
|
||||||
|
packet.frame_duration = server_frame_duration_;
|
||||||
packet.timestamp = timestamp;
|
packet.timestamp = timestamp;
|
||||||
packet.payload.resize(decrypted_size);
|
packet.payload.resize(decrypted_size);
|
||||||
int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)packet.payload.data());
|
int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)packet.payload.data());
|
||||||
|
|||||||
@ -8,6 +8,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
struct AudioStreamPacket {
|
struct AudioStreamPacket {
|
||||||
|
int sample_rate = 0;
|
||||||
|
int frame_duration = 0;
|
||||||
uint32_t timestamp = 0;
|
uint32_t timestamp = 0;
|
||||||
std::vector<uint8_t> payload;
|
std::vector<uint8_t> payload;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -124,6 +124,8 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
|||||||
bp2->payload_size = ntohl(bp2->payload_size);
|
bp2->payload_size = ntohl(bp2->payload_size);
|
||||||
auto payload = (uint8_t*)bp2->payload;
|
auto payload = (uint8_t*)bp2->payload;
|
||||||
on_incoming_audio_(AudioStreamPacket{
|
on_incoming_audio_(AudioStreamPacket{
|
||||||
|
.sample_rate = server_sample_rate_,
|
||||||
|
.frame_duration = server_frame_duration_,
|
||||||
.timestamp = bp2->timestamp,
|
.timestamp = bp2->timestamp,
|
||||||
.payload = std::vector<uint8_t>(payload, payload + bp2->payload_size)
|
.payload = std::vector<uint8_t>(payload, payload + bp2->payload_size)
|
||||||
});
|
});
|
||||||
@ -133,11 +135,15 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
|||||||
bp3->payload_size = ntohs(bp3->payload_size);
|
bp3->payload_size = ntohs(bp3->payload_size);
|
||||||
auto payload = (uint8_t*)bp3->payload;
|
auto payload = (uint8_t*)bp3->payload;
|
||||||
on_incoming_audio_(AudioStreamPacket{
|
on_incoming_audio_(AudioStreamPacket{
|
||||||
|
.sample_rate = server_sample_rate_,
|
||||||
|
.frame_duration = server_frame_duration_,
|
||||||
.timestamp = 0,
|
.timestamp = 0,
|
||||||
.payload = std::vector<uint8_t>(payload, payload + bp3->payload_size)
|
.payload = std::vector<uint8_t>(payload, payload + bp3->payload_size)
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
on_incoming_audio_(AudioStreamPacket{
|
on_incoming_audio_(AudioStreamPacket{
|
||||||
|
.sample_rate = server_sample_rate_,
|
||||||
|
.frame_duration = server_frame_duration_,
|
||||||
.timestamp = 0,
|
.timestamp = 0,
|
||||||
.payload = std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len)
|
.payload = std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len)
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,2 +1,3 @@
|
|||||||
|
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
|
CONFIG_SR_WN_WN9S_NIHAOXIAOZHI=y
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user