aec: add initial webrtc-audio-processing implementation

- Enables high-pass filter, AEC, AGC, NS.
- Disables system AEC for Windows

Gitlab: #464
Change-Id: Ife6261a815395263abeca482bb78ad2c90133db3
This commit is contained in:
Andreas Traczyk
2021-02-26 10:21:04 -05:00
committed by Adrien Béraud
parent 972ed1932e
commit 854362a8c2
11 changed files with 321 additions and 23 deletions

View File

@ -122,17 +122,6 @@ if(MSVC)
################################################################################
if("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "x64")
target_include_directories(${PROJECT_NAME} PUBLIC
"$<$<CONFIG:ReleaseLib_win32>:"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc/include/upnp;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/ffmpeg/Build/win32/x64/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/sndfile/src;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/asio/asio/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/restinio/dev;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/fmt/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/http_parser;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/natpmp"
">"
"${CMAKE_CURRENT_SOURCE_DIR}/.;"
"${CMAKE_CURRENT_SOURCE_DIR}/src;"
"${CMAKE_CURRENT_SOURCE_DIR}/src/client;"
@ -152,6 +141,15 @@ if(MSVC)
"${CMAKE_CURRENT_SOURCE_DIR}/compat/msvc;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/msvc/include/upnp;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/ffmpeg/Build/win32/x64/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/sndfile/src;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/asio/asio/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/restinio/dev;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/fmt/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/http_parser;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/natpmp"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/msgpack-c/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/opendht/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/libarchive/libarchive;"
@ -164,6 +162,7 @@ if(MSVC)
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/pjproject/third_party;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/pjproject/pjmedia/include"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/speexdsp/include;"
"${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/webrtc-audio-processing"
)
endif()
@ -190,6 +189,9 @@ if(MSVC)
"NOMINMAX;"
"HAVE_CONFIG_H;"
"WIN32_LEAN_AND_MEAN;"
"WEBRTC_WIN;"
"WEBRTC_AUDIO_PROCESSING_ONLY_BUILD;"
"WEBRTC_NS_FLOAT;"
)
endif()
@ -274,6 +276,7 @@ if(MSVC)
${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/libcrypto.lib
${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/openssl/libssl.lib
${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/speexdsp/lib/libspeexdsp.lib
${CMAKE_CURRENT_SOURCE_DIR}/contrib/build/webrtc-audio-processing/build/Release/webrtc-audio-processing.lib
/ignore:4006
"
)

View File

@ -13,7 +13,7 @@ systems. This function is required for `alloca.c' support on those systems.
#define HAVE_ALLOCA 1
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
*/
*/
#define HAVE_ALLOCA_H 1
/* Define if you have alsa */
@ -100,6 +100,9 @@ systems. This function is required for `alloca.c' support on those systems.
/* Define if you have libspeexdsp */
#define HAVE_SPEEXDSP 1
/* Define if you have webrtc-audio-processing */
#define HAVE_WEBRTC_AP 1
/* Define to 1 if stdbool.h conforms to C99. */
#define HAVE_STDBOOL_H 1

View File

@ -27,7 +27,13 @@
#include "audio/resampler.h"
#include "tonecontrol.h"
#include "client/ring_signal.h"
// aec
#if HAVE_WEBRTC_AP
#include "echo-cancel/webrtc_echo_canceller.h"
#else
#include "echo-cancel/null_echo_canceller.h"
#endif
#include <ctime>
#include <algorithm>
@ -118,12 +124,32 @@ AudioLayer::setHasNativeAEC(bool hasEAC)
void
AudioLayer::checkAEC()
{
std::lock_guard<std::mutex> lk(ecMutex_);
bool shouldSoftAEC = not hasNativeAEC_ and playbackStarted_ and recordStarted_;
if (not echoCanceller_ and shouldSoftAEC) {
JAMI_WARN("Starting AEC");
echoCanceller_.reset(new NullEchoCanceller(audioFormat_, audioFormat_.sample_rate / 100));
} else if (echoCanceller_ and not shouldSoftAEC) {
auto nb_channels = std::min(audioFormat_.nb_channels, audioInputFormat_.nb_channels);
auto sample_rate = std::min(audioFormat_.sample_rate, audioInputFormat_.sample_rate);
if (sample_rate % 16000u != 0)
sample_rate = 16000u * ((sample_rate / 16000u) + 1u);
sample_rate = std::clamp(sample_rate, 16000u, 96000u);
AudioFormat format {sample_rate, nb_channels};
auto frame_size = sample_rate / 100u;
JAMI_WARN("Input {%d Hz, %d channels}",
audioInputFormat_.sample_rate,
audioInputFormat_.nb_channels);
JAMI_WARN("Output {%d Hz, %d channels}", audioFormat_.sample_rate, audioFormat_.nb_channels);
JAMI_WARN("Starting AEC {%d Hz, %d channels, %d samples/frame}",
sample_rate,
nb_channels,
frame_size);
#if HAVE_WEBRTC_AP
echoCanceller_.reset(new WebRTCEchoCanceller(format, frame_size));
#else
echoCanceller_.reset(new NullEchoCanceller(format, frame_size));
#endif
} else if (echoCanceller_ and not shouldSoftAEC and not playbackStarted_
and not recordStarted_) {
JAMI_WARN("Stopping AEC");
echoCanceller_.reset();
}
@ -209,11 +235,18 @@ AudioLayer::getToPlay(AudioFormat format, size_t writableSamples)
} else if (auto buf = bufferPool.getData(RingBufferPool::DEFAULT_ID)) {
resampled = resampler_->resample(std::move(buf), format);
} else {
if (echoCanceller_) {
auto silence = std::make_shared<AudioFrame>(format, writableSamples);
libav_utils::fillWithSilence(silence->pointer());
std::lock_guard<std::mutex> lk(ecMutex_);
echoCanceller_->putPlayback(silence);
}
break;
}
if (resampled) {
if (echoCanceller_) {
std::lock_guard<std::mutex> lk(ecMutex_);
echoCanceller_->putPlayback(resampled);
}
playbackQueue_->enqueue(std::move(resampled));
@ -228,9 +261,11 @@ void
AudioLayer::putRecorded(std::shared_ptr<AudioFrame>&& frame)
{
if (echoCanceller_) {
std::lock_guard<std::mutex> lk(ecMutex_);
echoCanceller_->putRecorded(std::move(frame));
while (auto rec = echoCanceller_->getProcessed())
while (auto rec = echoCanceller_->getProcessed()) {
mainRingBuffer_->put(std::move(rec));
}
} else {
mainRingBuffer_->put(std::move(frame));
}

View File

@ -289,6 +289,7 @@ protected:
*/
std::unique_ptr<Resampler> resampler_;
std::mutex ecMutex_ {};
std::unique_ptr<EchoCanceller> echoCanceller_;
private:

View File

@ -7,6 +7,8 @@ list (APPEND Source_Files__media__audio__echo_cancel
"${CMAKE_CURRENT_SOURCE_DIR}/null_echo_canceller.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/speex_echo_canceller.h"
"${CMAKE_CURRENT_SOURCE_DIR}/speex_echo_canceller.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/webrtc_echo_canceller.h"
"${CMAKE_CURRENT_SOURCE_DIR}/webrtc_echo_canceller.cpp"
)
set (Source_Files__media__audio__echo_cancel ${Source_Files__media__audio__echo_cancel} PARENT_SCOPE)

View File

@ -10,6 +10,11 @@ EC_SRC += speex_echo_canceller.cpp
EC_HDR += speex_echo_canceller.h
endif
if HAVE_WEBRTC_AP
EC_SRC += webrtc_echo_canceller.cpp
EC_HDR += webrtc_echo_canceller.h
endif
libecho_cancel_la_SOURCES = \
$(EC_SRC)

View File

@ -22,9 +22,13 @@
#include "noncopyable.h"
#include "audio/audio_frame_resizer.h"
#include "audio/resampler.h"
#include "audio/audiobuffer.h"
#include "libav_deps.h"
#include <atomic>
#include <memory>
namespace jami {
class EchoCanceller
@ -36,19 +40,26 @@ public:
EchoCanceller(AudioFormat format, unsigned frameSize)
: playbackQueue_(format, frameSize)
, recordQueue_(format, frameSize)
, sampleRate_(format.sample_rate)
, resampler_(new Resampler)
, format_(format)
, frameSize_(frameSize)
{}
virtual ~EchoCanceller() = default;
virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf)
{
recordQueue_.enqueue(std::move(buf));
recordStarted_ = true;
if (!playbackStarted_)
return;
enqueue(recordQueue_, std::move(buf));
};
virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf)
{
auto c = buf;
playbackQueue_.enqueue(std::move(c));
playbackStarted_ = true;
if (!recordStarted_)
return;
auto copy = buf;
enqueue(playbackQueue_, std::move(copy));
};
virtual std::shared_ptr<AudioFrame> getProcessed() = 0;
virtual void done() = 0;
@ -56,8 +67,21 @@ public:
protected:
AudioFrameResizer playbackQueue_;
AudioFrameResizer recordQueue_;
unsigned sampleRate_;
std::unique_ptr<Resampler> resampler_;
std::atomic_bool playbackStarted_;
std::atomic_bool recordStarted_;
AudioFormat format_;
unsigned frameSize_;
private:
void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf)
{
if (buf->getFormat() != format_) {
auto resampled = resampler_->resample(std::move(buf), format_);
frameResizer.enqueue(std::move(resampled));
} else
frameResizer.enqueue(std::move(buf));
};
};
} // namespace jami

View File

@ -50,7 +50,7 @@ SpeexEchoCanceller::SpeexEchoCanceller(AudioFormat format, unsigned frameSize)
: EchoCanceller(format, frameSize)
, pimpl_(std::make_unique<SpeexEchoStateImpl>(format, frameSize))
{
speex_echo_ctl(pimpl_->state.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate_);
speex_echo_ctl(pimpl_->state.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
}
void

View File

@ -0,0 +1,170 @@
/*
* Copyright (C) 2021 Savoir-faire Linux Inc.
*
* Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "webrtc_echo_canceller.h"
#include <webrtc/modules/audio_processing/include/audio_processing.h>
namespace jami {
WebRTCEchoCanceller::WebRTCEchoCanceller(AudioFormat format, unsigned frameSize)
: EchoCanceller(format, frameSize)
, pimpl_(std::make_unique<WebRTCAPMImpl>(format, frameSize))
, fRecordBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
, fPlaybackBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
, iRecordBuffer_(frameSize_, format)
, iPlaybackBuffer_(frameSize_, format)
{}
struct WebRTCEchoCanceller::WebRTCAPMImpl
{
using APMPtr = std::unique_ptr<webrtc::AudioProcessing>;
APMPtr apm;
webrtc::StreamConfig streamConfig;
WebRTCAPMImpl(AudioFormat format, unsigned frameSize)
: streamConfig(format.sample_rate, format.nb_channels)
{
webrtc::ProcessingConfig pconfig;
webrtc::Config config;
config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true));
apm.reset(webrtc::AudioProcessing::Create(config));
pconfig = {
streamConfig, /* input stream */
streamConfig, /* output stream */
streamConfig, /* reverse input stream */
streamConfig, /* reverse output stream */
};
if (apm->Initialize(pconfig) != webrtc::AudioProcessing::kNoError) {
JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
}
// aec
apm->echo_cancellation()->set_suppression_level(
webrtc::EchoCancellation::SuppressionLevel::kModerateSuppression);
apm->echo_cancellation()->enable_drift_compensation(true);
apm->echo_cancellation()->Enable(true);
// hpf
apm->high_pass_filter()->Enable(true);
// ns
apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kHigh);
apm->noise_suppression()->Enable(true);
// agc
apm->gain_control()->set_analog_level_limits(0, 255);
apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog);
apm->gain_control()->Enable(true);
}
};
void
WebRTCEchoCanceller::putRecorded(std::shared_ptr<AudioFrame>&& buf)
{
EchoCanceller::putRecorded(std::move(buf));
}
void
WebRTCEchoCanceller::putPlayback(const std::shared_ptr<AudioFrame>& buf)
{
EchoCanceller::putPlayback(buf);
}
std::shared_ptr<AudioFrame>
WebRTCEchoCanceller::getProcessed()
{
while (recordQueue_.samples() > recordQueue_.frameSize() * 10) {
JAMI_DBG("record overflow %d / %d", recordQueue_.samples(), frameSize_);
recordQueue_.dequeue();
}
while (playbackQueue_.samples() > playbackQueue_.frameSize() * 10) {
JAMI_DBG("playback overflow %d / %d", playbackQueue_.samples(), frameSize_);
playbackQueue_.dequeue();
}
if (recordQueue_.samples() < recordQueue_.frameSize()
|| playbackQueue_.samples() < playbackQueue_.frameSize()) {
// If there are not enough samples in either queue, we can't
// process anything.
// JAMI_DBG("underrun p:%d / r:%d", playbackQueue_.samples(), recordQueue_.samples());
return {};
}
int driftSamples = playbackQueue_.samples() - recordQueue_.samples();
auto playback = playbackQueue_.dequeue();
auto record = recordQueue_.dequeue();
if (!playback || !record)
return {};
auto processed = std::make_shared<AudioFrame>(format_, frameSize_);
webrtc::StreamConfig& sc = pimpl_->streamConfig;
// analyze deinterleaved float playback data
iPlaybackBuffer_.deinterleave((const AudioSample*) playback->pointer()->data[0],
frameSize_,
format_.nb_channels);
std::vector<float*> playData {format_.nb_channels};
for (auto c = 0; c < format_.nb_channels; ++c) {
playData[c] = fPlaybackBuffer_[c].data();
iPlaybackBuffer_.channelToFloat(playData[c], c);
}
if (pimpl_->apm->ProcessReverseStream(playData.data(), sc, sc, playData.data())
!= webrtc::AudioProcessing::kNoError)
JAMI_ERR("[webrtc-ap] ProcessReverseStream failed");
// process deinterleaved float recorded data
iRecordBuffer_.deinterleave((const AudioSample*) record->pointer()->data[0],
frameSize_,
format_.nb_channels);
std::vector<float*> recData {format_.nb_channels};
for (auto c = 0; c < format_.nb_channels; ++c) {
recData[c] = fRecordBuffer_[c].data();
iRecordBuffer_.channelToFloat(recData[c], c);
}
// TODO: implement this correctly (it MUST be called prior to ProcessStream)
// delay = (t_render - t_analyze) + (t_process - t_capture)
pimpl_->apm->set_stream_delay_ms(0);
pimpl_->apm->gain_control()->set_stream_analog_level(analogLevel_);
pimpl_->apm->echo_cancellation()->set_stream_drift_samples(driftSamples);
if (pimpl_->apm->ProcessStream(recData.data(), sc, sc, recData.data())
!= webrtc::AudioProcessing::kNoError)
JAMI_ERR("[webrtc-ap] ProcessStream failed");
analogLevel_ = pimpl_->apm->gain_control()->stream_analog_level();
// return interleaved s16 data
iRecordBuffer_.convertFloatPlanarToSigned16((uint8_t**) recData.data(),
frameSize_,
format_.nb_channels);
iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);
return processed;
}
void
WebRTCEchoCanceller::done()
{}
} // namespace jami

View File

@ -0,0 +1,53 @@
/*
* Copyright (C) 2021 Savoir-faire Linux Inc.
*
* Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#pragma once
#include "audio/echo-cancel/echo_canceller.h"
#include "audio/audio_frame_resizer.h"
#include <memory>
namespace jami {
class WebRTCEchoCanceller final : public EchoCanceller
{
public:
WebRTCEchoCanceller(AudioFormat format, unsigned frameSize);
~WebRTCEchoCanceller() = default;
// Inherited via EchoCanceller
void putRecorded(std::shared_ptr<AudioFrame>&& buf) override;
void putPlayback(const std::shared_ptr<AudioFrame>& buf) override;
std::shared_ptr<AudioFrame> getProcessed() override;
void done() override;
private:
struct WebRTCAPMImpl;
std::unique_ptr<WebRTCAPMImpl> pimpl_;
using fChannelBuffer = std::vector<std::vector<float>>;
fChannelBuffer fRecordBuffer_;
fChannelBuffer fPlaybackBuffer_;
AudioBuffer iRecordBuffer_;
AudioBuffer iPlaybackBuffer_;
int analogLevel_ {0};
};
} // namespace jami

View File

@ -91,6 +91,8 @@ PortAudioLayer::PortAudioLayer(const AudioPreference& pref)
: AudioLayer {pref}
, pimpl_ {new PortAudioLayerImpl(*this, pref)}
{
setHasNativeAEC(false);
auto numDevices = Pa_GetDeviceCount();
if (numDevices < 0) {
JAMI_ERR("Pa_CountDevices returned 0x%x", numDevices);