aec: add initial webrtc-audio-processing implementation

- Enables high-pass filter, AEC, AGC, NS. - Disables system AEC for Windows Gitlab: #464 Change-Id: Ife6261a815395263abeca482bb78ad2c90133db3
2025-08-07 22:02:12 +08:00 · 2021-02-26 10:21:04 -05:00
parent 972ed1932e
commit 854362a8c2
11 changed files with 321 additions and 23 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -122,17 +122,6 @@ if(MSVC)
   ################################################################################
   if("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "x64")
       target_include_directories(${PROJECT_NAME} PUBLIC
-           "$<$<CONFIG:ReleaseLib_win32>:"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc/include/upnp;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/ffmpeg/Build/win32/x64/include;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/sndfile/src;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/include;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/asio/asio/include;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/restinio/dev;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/fmt/include;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/http_parser;"
-               "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/natpmp"
-           ">"
           "${CMAKE_CURRENT_SOURCE_DIR}/.;"
           "${CMAKE_CURRENT_SOURCE_DIR}/src;"
           "${CMAKE_CURRENT_SOURCE_DIR}/src/client;"
@ -152,6 +141,15 @@ if(MSVC)
           "${CMAKE_CURRENT_SOURCE_DIR}/compat/msvc;"
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc;"
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc/include;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc/include/upnp;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/ffmpeg/Build/win32/x64/include;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/sndfile/src;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/include;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/asio/asio/include;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/restinio/dev;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/fmt/include;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/http_parser;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/natpmp"
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/msgpack-c/include;"
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/opendht/include;"
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/libarchive/libarchive;"
@ -164,6 +162,7 @@ if(MSVC)
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/pjproject/third_party;"
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/pjproject/pjmedia/include"
           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/speexdsp/include;"
+           "${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/webrtc-audio-processing"
       )
   endif()

@ -190,6 +189,9 @@ if(MSVC)
           "NOMINMAX;"
           "HAVE_CONFIG_H;"
           "WIN32_LEAN_AND_MEAN;"
+           "WEBRTC_WIN;"
+           "WEBRTC_AUDIO_PROCESSING_ONLY_BUILD;"
+           "WEBRTC_NS_FLOAT;"
       )
   endif()

@ -274,6 +276,7 @@ if(MSVC)
   ${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/libcrypto.lib 
   ${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/libssl.lib 
   ${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/speexdsp/lib/libspeexdsp.lib
+   ${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/webrtc-audio-processing/build/Release/webrtc-audio-processing.lib
   /ignore:4006 
   "
   )
--- a/compat/msvc/config.h
+++ b/compat/msvc/config.h
@ -13,7 +13,7 @@ systems. This function is required for `alloca.c' support on those systems.
 #define HAVE_ALLOCA 1

 /* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
-*/
+ */
 #define HAVE_ALLOCA_H 1

 /* Define if you have alsa */
@ -100,6 +100,9 @@ systems. This function is required for `alloca.c' support on those systems.
 /* Define if you have libspeexdsp */
 #define HAVE_SPEEXDSP 1

+/* Define if you have webrtc-audio-processing */
+#define HAVE_WEBRTC_AP 1
+
 /* Define to 1 if stdbool.h conforms to C99. */
 #define HAVE_STDBOOL_H 1

--- a/src/media/audio/audiolayer.cpp
+++ b/src/media/audio/audiolayer.cpp
@ -27,7 +27,13 @@
 #include "audio/resampler.h"
 #include "tonecontrol.h"
 #include "client/ring_signal.h"
+
+// aec
+#if HAVE_WEBRTC_AP
+#include "echo-cancel/webrtc_echo_canceller.h"
+#else
 #include "echo-cancel/null_echo_canceller.h"
+#endif

 #include <ctime>
 #include <algorithm>
@ -118,12 +124,32 @@ AudioLayer::setHasNativeAEC(bool hasEAC)
 void
 AudioLayer::checkAEC()
 {
+    std::lock_guard<std::mutex> lk(ecMutex_);
    bool shouldSoftAEC = not hasNativeAEC_ and playbackStarted_ and recordStarted_;
-
    if (not echoCanceller_ and shouldSoftAEC) {
-        JAMI_WARN("Starting AEC");
-        echoCanceller_.reset(new NullEchoCanceller(audioFormat_, audioFormat_.sample_rate / 100));
-    } else if (echoCanceller_ and not shouldSoftAEC) {
+        auto nb_channels = std::min(audioFormat_.nb_channels, audioInputFormat_.nb_channels);
+        auto sample_rate = std::min(audioFormat_.sample_rate, audioInputFormat_.sample_rate);
+        if (sample_rate % 16000u != 0)
+            sample_rate = 16000u * ((sample_rate / 16000u) + 1u);
+        sample_rate = std::clamp(sample_rate, 16000u, 96000u);
+        AudioFormat format {sample_rate, nb_channels};
+        auto frame_size = sample_rate / 100u;
+        JAMI_WARN("Input {%d Hz, %d channels}",
+                  audioInputFormat_.sample_rate,
+                  audioInputFormat_.nb_channels);
+        JAMI_WARN("Output {%d Hz, %d channels}", audioFormat_.sample_rate, audioFormat_.nb_channels);
+        JAMI_WARN("Starting AEC {%d Hz, %d channels, %d samples/frame}",
+                  sample_rate,
+                  nb_channels,
+                  frame_size);
+
+#if HAVE_WEBRTC_AP
+        echoCanceller_.reset(new WebRTCEchoCanceller(format, frame_size));
+#else
+        echoCanceller_.reset(new NullEchoCanceller(format, frame_size));
+#endif
+    } else if (echoCanceller_ and not shouldSoftAEC and not playbackStarted_
+               and not recordStarted_) {
        JAMI_WARN("Stopping AEC");
        echoCanceller_.reset();
    }
@ -209,11 +235,18 @@ AudioLayer::getToPlay(AudioFormat format, size_t writableSamples)
        } else if (auto buf = bufferPool.getData(RingBufferPool::DEFAULT_ID)) {
            resampled = resampler_->resample(std::move(buf), format);
        } else {
+            if (echoCanceller_) {
+                auto silence = std::make_shared<AudioFrame>(format, writableSamples);
+                libav_utils::fillWithSilence(silence->pointer());
+                std::lock_guard<std::mutex> lk(ecMutex_);
+                echoCanceller_->putPlayback(silence);
+            }
            break;
        }

        if (resampled) {
            if (echoCanceller_) {
+                std::lock_guard<std::mutex> lk(ecMutex_);
                echoCanceller_->putPlayback(resampled);
            }
            playbackQueue_->enqueue(std::move(resampled));
@ -228,9 +261,11 @@ void
 AudioLayer::putRecorded(std::shared_ptr<AudioFrame>&& frame)
 {
    if (echoCanceller_) {
+        std::lock_guard<std::mutex> lk(ecMutex_);
        echoCanceller_->putRecorded(std::move(frame));
-        while (auto rec = echoCanceller_->getProcessed())
+        while (auto rec = echoCanceller_->getProcessed()) {
            mainRingBuffer_->put(std::move(rec));
+        }
    } else {
        mainRingBuffer_->put(std::move(frame));
    }
--- a/src/media/audio/audiolayer.h
+++ b/src/media/audio/audiolayer.h
@ -289,6 +289,7 @@ protected:
     */
    std::unique_ptr<Resampler> resampler_;

+    std::mutex ecMutex_ {};
    std::unique_ptr<EchoCanceller> echoCanceller_;

 private:
--- a/src/media/audio/echo-cancel/CMakeLists.txt
+++ b/src/media/audio/echo-cancel/CMakeLists.txt
@ -7,6 +7,8 @@ list (APPEND Source_Files__media__audio__echo_cancel
      "${CMAKE_CURRENT_SOURCE_DIR}/null_echo_canceller.cpp"
      "${CMAKE_CURRENT_SOURCE_DIR}/speex_echo_canceller.h"
      "${CMAKE_CURRENT_SOURCE_DIR}/speex_echo_canceller.cpp"
+      "${CMAKE_CURRENT_SOURCE_DIR}/webrtc_echo_canceller.h"
+      "${CMAKE_CURRENT_SOURCE_DIR}/webrtc_echo_canceller.cpp"
 )

 set (Source_Files__media__audio__echo_cancel ${Source_Files__media__audio__echo_cancel} PARENT_SCOPE)
--- a/src/media/audio/echo-cancel/Makefile.am
+++ b/src/media/audio/echo-cancel/Makefile.am
@ -10,6 +10,11 @@ EC_SRC += speex_echo_canceller.cpp
 EC_HDR += speex_echo_canceller.h
 endif

+if HAVE_WEBRTC_AP
+EC_SRC += webrtc_echo_canceller.cpp
+EC_HDR += webrtc_echo_canceller.h
+endif
+
 libecho_cancel_la_SOURCES = \
 		$(EC_SRC)

--- a/src/media/audio/echo-cancel/echo_canceller.h
+++ b/src/media/audio/echo-cancel/echo_canceller.h
@ -22,9 +22,13 @@

 #include "noncopyable.h"
 #include "audio/audio_frame_resizer.h"
+#include "audio/resampler.h"
 #include "audio/audiobuffer.h"
 #include "libav_deps.h"

+#include <atomic>
+#include <memory>
+
 namespace jami {

 class EchoCanceller
@ -36,19 +40,26 @@ public:
    EchoCanceller(AudioFormat format, unsigned frameSize)
        : playbackQueue_(format, frameSize)
        , recordQueue_(format, frameSize)
-        , sampleRate_(format.sample_rate)
+        , resampler_(new Resampler)
+        , format_(format)
        , frameSize_(frameSize)
    {}
    virtual ~EchoCanceller() = default;

    virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf)
    {
-        recordQueue_.enqueue(std::move(buf));
+        recordStarted_ = true;
+        if (!playbackStarted_)
+            return;
+        enqueue(recordQueue_, std::move(buf));
    };
    virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf)
    {
-        auto c = buf;
-        playbackQueue_.enqueue(std::move(c));
+        playbackStarted_ = true;
+        if (!recordStarted_)
+            return;
+        auto copy = buf;
+        enqueue(playbackQueue_, std::move(copy));
    };
    virtual std::shared_ptr<AudioFrame> getProcessed() = 0;
    virtual void done() = 0;
@ -56,8 +67,21 @@ public:
 protected:
    AudioFrameResizer playbackQueue_;
    AudioFrameResizer recordQueue_;
-    unsigned sampleRate_;
+    std::unique_ptr<Resampler> resampler_;
+    std::atomic_bool playbackStarted_;
+    std::atomic_bool recordStarted_;
+    AudioFormat format_;
    unsigned frameSize_;
+
+private:
+    void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf)
+    {
+        if (buf->getFormat() != format_) {
+            auto resampled = resampler_->resample(std::move(buf), format_);
+            frameResizer.enqueue(std::move(resampled));
+        } else
+            frameResizer.enqueue(std::move(buf));
+    };
 };

 } // namespace jami
--- a/src/media/audio/echo-cancel/speex_echo_canceller.cpp
+++ b/src/media/audio/echo-cancel/speex_echo_canceller.cpp
@ -50,7 +50,7 @@ SpeexEchoCanceller::SpeexEchoCanceller(AudioFormat format, unsigned frameSize)
    : EchoCanceller(format, frameSize)
    , pimpl_(std::make_unique<SpeexEchoStateImpl>(format, frameSize))
 {
-    speex_echo_ctl(pimpl_->state.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate_);
+    speex_echo_ctl(pimpl_->state.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
 }

 void
--- a/src/media/audio/echo-cancel/webrtc_echo_canceller.cpp
+++ b/src/media/audio/echo-cancel/webrtc_echo_canceller.cpp
@ -0,0 +1,170 @@
+/*
+ *  Copyright (C) 2021 Savoir-faire Linux Inc.
+ *
+ *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ */
+
+#include "webrtc_echo_canceller.h"
+
+#include <webrtc/modules/audio_processing/include/audio_processing.h>
+
+namespace jami {
+
+WebRTCEchoCanceller::WebRTCEchoCanceller(AudioFormat format, unsigned frameSize)
+    : EchoCanceller(format, frameSize)
+    , pimpl_(std::make_unique<WebRTCAPMImpl>(format, frameSize))
+    , fRecordBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
+    , fPlaybackBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
+    , iRecordBuffer_(frameSize_, format)
+    , iPlaybackBuffer_(frameSize_, format)
+{}
+
+struct WebRTCEchoCanceller::WebRTCAPMImpl
+{
+    using APMPtr = std::unique_ptr<webrtc::AudioProcessing>;
+    APMPtr apm;
+    webrtc::StreamConfig streamConfig;
+
+    WebRTCAPMImpl(AudioFormat format, unsigned frameSize)
+        : streamConfig(format.sample_rate, format.nb_channels)
+    {
+        webrtc::ProcessingConfig pconfig;
+        webrtc::Config config;
+
+        config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
+        config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true));
+
+        apm.reset(webrtc::AudioProcessing::Create(config));
+
+        pconfig = {
+            streamConfig, /* input stream */
+            streamConfig, /* output stream */
+            streamConfig, /* reverse input stream */
+            streamConfig, /* reverse output stream */
+        };
+
+        if (apm->Initialize(pconfig) != webrtc::AudioProcessing::kNoError) {
+            JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
+        }
+
+        // aec
+        apm->echo_cancellation()->set_suppression_level(
+            webrtc::EchoCancellation::SuppressionLevel::kModerateSuppression);
+        apm->echo_cancellation()->enable_drift_compensation(true);
+        apm->echo_cancellation()->Enable(true);
+
+        // hpf
+        apm->high_pass_filter()->Enable(true);
+
+        // ns
+        apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kHigh);
+        apm->noise_suppression()->Enable(true);
+
+        // agc
+        apm->gain_control()->set_analog_level_limits(0, 255);
+        apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog);
+        apm->gain_control()->Enable(true);
+    }
+};
+
+void
+WebRTCEchoCanceller::putRecorded(std::shared_ptr<AudioFrame>&& buf)
+{
+    EchoCanceller::putRecorded(std::move(buf));
+}
+
+void
+WebRTCEchoCanceller::putPlayback(const std::shared_ptr<AudioFrame>& buf)
+{
+    EchoCanceller::putPlayback(buf);
+}
+
+std::shared_ptr<AudioFrame>
+WebRTCEchoCanceller::getProcessed()
+{
+    while (recordQueue_.samples() > recordQueue_.frameSize() * 10) {
+        JAMI_DBG("record overflow %d / %d", recordQueue_.samples(), frameSize_);
+        recordQueue_.dequeue();
+    }
+    while (playbackQueue_.samples() > playbackQueue_.frameSize() * 10) {
+        JAMI_DBG("playback overflow %d / %d", playbackQueue_.samples(), frameSize_);
+        playbackQueue_.dequeue();
+    }
+    if (recordQueue_.samples() < recordQueue_.frameSize()
+        || playbackQueue_.samples() < playbackQueue_.frameSize()) {
+        // If there are not enough samples in either queue, we can't
+        // process anything.
+        // JAMI_DBG("underrun p:%d / r:%d", playbackQueue_.samples(), recordQueue_.samples());
+        return {};
+    }
+
+    int driftSamples = playbackQueue_.samples() - recordQueue_.samples();
+
+    auto playback = playbackQueue_.dequeue();
+    auto record = recordQueue_.dequeue();
+    if (!playback || !record)
+        return {};
+
+    auto processed = std::make_shared<AudioFrame>(format_, frameSize_);
+
+    webrtc::StreamConfig& sc = pimpl_->streamConfig;
+
+    // analyze deinterleaved float playback data
+    iPlaybackBuffer_.deinterleave((const AudioSample*) playback->pointer()->data[0],
+                                  frameSize_,
+                                  format_.nb_channels);
+    std::vector<float*> playData {format_.nb_channels};
+    for (auto c = 0; c < format_.nb_channels; ++c) {
+        playData[c] = fPlaybackBuffer_[c].data();
+        iPlaybackBuffer_.channelToFloat(playData[c], c);
+    }
+    if (pimpl_->apm->ProcessReverseStream(playData.data(), sc, sc, playData.data())
+        != webrtc::AudioProcessing::kNoError)
+        JAMI_ERR("[webrtc-ap] ProcessReverseStream failed");
+
+    // process deinterleaved float recorded data
+    iRecordBuffer_.deinterleave((const AudioSample*) record->pointer()->data[0],
+                                frameSize_,
+                                format_.nb_channels);
+    std::vector<float*> recData {format_.nb_channels};
+    for (auto c = 0; c < format_.nb_channels; ++c) {
+        recData[c] = fRecordBuffer_[c].data();
+        iRecordBuffer_.channelToFloat(recData[c], c);
+    }
+    // TODO: implement this correctly (it MUST be called prior to ProcessStream)
+    // delay = (t_render - t_analyze) + (t_process - t_capture)
+    pimpl_->apm->set_stream_delay_ms(0);
+    pimpl_->apm->gain_control()->set_stream_analog_level(analogLevel_);
+    pimpl_->apm->echo_cancellation()->set_stream_drift_samples(driftSamples);
+    if (pimpl_->apm->ProcessStream(recData.data(), sc, sc, recData.data())
+        != webrtc::AudioProcessing::kNoError)
+        JAMI_ERR("[webrtc-ap] ProcessStream failed");
+    analogLevel_ = pimpl_->apm->gain_control()->stream_analog_level();
+
+    // return interleaved s16 data
+    iRecordBuffer_.convertFloatPlanarToSigned16((uint8_t**) recData.data(),
+                                                frameSize_,
+                                                format_.nb_channels);
+    iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);
+    return processed;
+}
+
+void
+WebRTCEchoCanceller::done()
+{}
+
+} // namespace jami
--- a/src/media/audio/echo-cancel/webrtc_echo_canceller.h
+++ b/src/media/audio/echo-cancel/webrtc_echo_canceller.h
@ -0,0 +1,53 @@
+/*
+ *  Copyright (C) 2021 Savoir-faire Linux Inc.
+ *
+ *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ */
+
+#pragma once
+
+#include "audio/echo-cancel/echo_canceller.h"
+#include "audio/audio_frame_resizer.h"
+
+#include <memory>
+
+namespace jami {
+
+class WebRTCEchoCanceller final : public EchoCanceller
+{
+public:
+    WebRTCEchoCanceller(AudioFormat format, unsigned frameSize);
+    ~WebRTCEchoCanceller() = default;
+
+    // Inherited via EchoCanceller
+    void putRecorded(std::shared_ptr<AudioFrame>&& buf) override;
+    void putPlayback(const std::shared_ptr<AudioFrame>& buf) override;
+    std::shared_ptr<AudioFrame> getProcessed() override;
+    void done() override;
+
+private:
+    struct WebRTCAPMImpl;
+    std::unique_ptr<WebRTCAPMImpl> pimpl_;
+
+    using fChannelBuffer = std::vector<std::vector<float>>;
+    fChannelBuffer fRecordBuffer_;
+    fChannelBuffer fPlaybackBuffer_;
+    AudioBuffer iRecordBuffer_;
+    AudioBuffer iPlaybackBuffer_;
+    int analogLevel_ {0};
+};
+} // namespace jami
--- a/src/media/audio/portaudio/portaudiolayer.cpp
+++ b/src/media/audio/portaudio/portaudiolayer.cpp
@ -91,6 +91,8 @@ PortAudioLayer::PortAudioLayer(const AudioPreference& pref)
    : AudioLayer {pref}
    , pimpl_ {new PortAudioLayerImpl(*this, pref)}
 {
+    setHasNativeAEC(false);
+
    auto numDevices = Pa_GetDeviceCount();
    if (numDevices < 0) {
        JAMI_ERR("Pa_CountDevices returned 0x%x", numDevices);