audio: refactor resampler class

Using MediaFilter would make the resampling time longer and more unpredictable than directly using libswresample. Adds libswresample as a dependency. Simplifies Resampler class. Resampler detects changes in input and output formats automatically. Changes Audiofile to use Resampler instead of MediaFilter. Change-Id: I24919e8fa514dbb4a38408e338016976e7424136
2025-08-12 22:09:25 +08:00 · 2018-08-01 16:52:39 -04:00
parent 034fd6424f
commit 20b631fb78
9 changed files with 91 additions and 109 deletions
--- a/configure.ac
+++ b/configure.ac
@ -446,6 +446,8 @@ PKG_CHECK_MODULES(LIBAVFILTER, libavfilter >= 5.40.101,, AC_MSG_ERROR([Missing l

 PKG_CHECK_MODULES(LIBSWSCALE, libswscale >= 3.1.101,, AC_MSG_ERROR([Missing libswscale development files]))

+PKG_CHECK_MODULES(LIBSWRESAMPLE, libswresample >= 1.2.101,, AC_MSG_ERROR([Missing libswresample development files]))
+
 dnl Video is default-enabled
 AC_ARG_ENABLE([video], AS_HELP_STRING([--disable-video], [Disable video]))

--- a/src/media/Makefile.am
+++ b/src/media/Makefile.am
@ -49,12 +49,12 @@ libmedia_la_libADD = \
 	./video/libvideo.la
 endif

-libmedia_la_LDFLAGS = @LIBAVCODEC_LIBS@ @LIBAVFORMAT_LIBS@ @LIBAVDEVICE_LIBS@ @LIBAVFILTER_LIBS@ @LIBSWSCALE_LIBS@ @LIBAVUTIL_LIBS@
+libmedia_la_LDFLAGS = @LIBAVCODEC_LIBS@ @LIBAVFORMAT_LIBS@ @LIBAVDEVICE_LIBS@ @LIBAVFILTER_LIBS@ @LIBSWRESAMPLE_LIBS@ @LIBSWSCALE_LIBS@ @LIBAVUTIL_LIBS@

 if HAVE_WIN32
 libmedia_la_LDFLAGS += -lws2_32 -lwsock32 -lshlwapi
 endif

-AM_CFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWSCALE_CFLAGS@
+AM_CFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWRESAMPLE_CFLAGS@ @LIBSWSCALE_CFLAGS@

-AM_CXXFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWSCALE_CFLAGS@
+AM_CXXFLAGS=@LIBAVCODEC_CFLAGS@ @LIBAVFORMAT_CFLAGS@ @LIBAVDEVICE_CFLAGS@ @LIBAVFILTER_CFLAGS@ @LIBSWRESAMPLE_CFLAGS@ @LIBSWSCALE_CFLAGS@
--- a/src/media/audio/audio_rtp_session.cpp
+++ b/src/media/audio/audio_rtp_session.cpp
@ -179,7 +179,7 @@ AudioSender::process()
    if (mainBuffFormat.sample_rate != accountAudioCodec->audioformat.sample_rate) {
        if (not resampler_) {
            RING_DBG("Creating audio resampler");
-            resampler_.reset(new Resampler(accountAudioCodec->audioformat));
+            resampler_.reset(new Resampler);
        }
        resampledData_.setFormat(accountAudioCodec->audioformat);
        resampledData_.resize(samplesToGet);
--- a/src/media/audio/audiolayer.cpp
+++ b/src/media/audio/audiolayer.cpp
@ -41,8 +41,8 @@ AudioLayer::AudioLayer(const AudioPreference &pref)
    , audioFormat_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
    , audioInputFormat_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
    , urgentRingBuffer_("urgentRingBuffer_id", SIZEBUF, audioFormat_)
-    , resampler_(new Resampler{audioFormat_.sample_rate})
-    , inputResampler_(new Resampler{audioInputFormat_.sample_rate})
+    , resampler_(new Resampler)
+    , inputResampler_(new Resampler)
    , lastNotificationTime_()
 {
    urgentRingBuffer_.createReadOffset(RingBufferPool::DEFAULT_ID);
@ -57,13 +57,11 @@ void AudioLayer::hardwareFormatAvailable(AudioFormat playback)
    RING_DBG("Hardware audio format available : %s", playback.toString().c_str());
    audioFormat_ = Manager::instance().hardwareAudioFormatChanged(playback);
    urgentRingBuffer_.setFormat(audioFormat_);
-    resampler_->setFormat(audioFormat_);
 }

 void AudioLayer::hardwareInputFormatAvailable(AudioFormat capture)
 {
    RING_DBG("Hardware input audio format available : %s", capture.toString().c_str());
-    inputResampler_->setFormat(capture);
 }

 void AudioLayer::devicesChanged()
--- a/src/media/audio/resampler.cpp
+++ b/src/media/audio/resampler.cpp
@ -19,82 +19,81 @@
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
 */

+#include "libav_deps.h"
 #include "logger.h"
+#include "media_buffer.h"
 #include "media_filter.h"
 #include "media_stream.h"
 #include "resampler.h"
 #include "ring_types.h"

+extern "C" {
+#include <libswresample/swresample.h>
+}
+
 namespace ring {

-Resampler::Resampler(AudioFormat format)
-    : format_(format)
-{
-    setFormat(format);
-}
+Resampler::Resampler()
+    : swrCtx_(swr_alloc())
+{}

-Resampler::Resampler(unsigned sample_rate, unsigned channels)
-    : format_(sample_rate, channels)
+Resampler::~Resampler()
 {
-    setFormat(format_);
+    swr_free(&swrCtx_);
 }

-Resampler::~Resampler() = default;
-
 void
-Resampler::reinitFilter(const MediaStream& inputParams)
+Resampler::reinit(const AudioFormat& in, const int inSampleFmt,
+                  const AudioFormat& out, const int outSampleFmt)
 {
-    filter_.reset(new MediaFilter());
-    std::stringstream aformat;
-    aformat << "aformat=sample_fmts=s16:channel_layouts="
-        << av_get_default_channel_layout(format_.nb_channels)
-        << ":sample_rates=" << format_.sample_rate;
-    if (filter_->initialize(aformat.str(), inputParams) < 0) {
-        RING_ERR() << "Failed to initialize resampler";
-        filter_.reset();
+    av_opt_set_int(swrCtx_, "ich", 0, 0);
+    av_opt_set_int(swrCtx_, "icl", av_get_default_channel_layout(in.nb_channels), 0);
+    av_opt_set_int(swrCtx_, "isr", in.sample_rate, 0);
+    av_opt_set_sample_fmt(swrCtx_, "isf", static_cast<AVSampleFormat>(inSampleFmt), 0);
+
+    av_opt_set_int(swrCtx_, "och", 0, 0);
+    av_opt_set_int(swrCtx_, "ocl", av_get_default_channel_layout(out.nb_channels), 0);
+    av_opt_set_int(swrCtx_, "osr", out.sample_rate, 0);
+    av_opt_set_sample_fmt(swrCtx_, "osf", static_cast<AVSampleFormat>(outSampleFmt), 0);
+
+    swr_init(swrCtx_);
+}
+
+int
+Resampler::resample(const AVFrame* input, AVFrame* output)
+{
+    int ret = swr_convert_frame(swrCtx_, output, input);
+    if (ret & AVERROR_INPUT_CHANGED || ret & AVERROR_OUTPUT_CHANGED) {
+        reinit(AudioFormat{(unsigned)input->sample_rate, (unsigned)input->channels}, input->format,
+               AudioFormat{(unsigned)output->sample_rate, (unsigned)output->channels}, output->format);
+        return resample(input, output);
+    } else if (ret < 0) {
+        RING_ERR() << "Failed to resample frame";
+        return -1;
    }
-}

-void
-Resampler::setFormat(AudioFormat format)
-{
-    format_ = format;
-    if (filter_)
-        reinitFilter(filter_->getInputParams());
+    return 0;
 }

 void
 Resampler::resample(const AudioBuffer& dataIn, AudioBuffer& dataOut)
 {
    auto input = dataIn.toAVFrame();
-    MediaStream currentParams("resampler", static_cast<AVSampleFormat>(input->format),
-        0, input->sample_rate, input->channels);
-    if (filter_) {
-        const auto& ms = filter_->getInputParams();
-        if (ms.sampleRate != input->sample_rate || ms.nbChannels != input->channels) {
-            RING_WARN() << "Resampler settings changed, reinitializing";
-            reinitFilter(currentParams);
-        }
-    } else {
-        reinitFilter(currentParams);
-    }
+    AudioFrame resampled;
+    auto output = resampled.pointer();
+    output->sample_rate = dataOut.getSampleRate();
+    output->channel_layout = av_get_default_channel_layout(dataOut.channels());
+    output->format = AV_SAMPLE_FMT_S16;

-    auto frame = filter_->apply(input);
-    av_frame_free(&input);
-    if (!frame) {
-        RING_ERR() << "Resampling failed, this may produce a glitch in the audio";
+    if (resample(input, output) < 0) {
+        av_frame_free(&input);
        return;
    }

-    dataOut.setFormat(format_);
-    dataOut.resize(frame->nb_samples);
-    if (static_cast<AVSampleFormat>(frame->format) == AV_SAMPLE_FMT_FLTP)
-        dataOut.convertFloatPlanarToSigned16(frame->extended_data,
-            frame->nb_samples, frame->channels);
-    else if (static_cast<AVSampleFormat>(frame->format) == AV_SAMPLE_FMT_S16)
-        dataOut.deinterleave(reinterpret_cast<const AudioSample*>(frame->extended_data[0]),
-            frame->nb_samples, frame->channels);
-    av_frame_free(&frame);
+    dataOut.resize(output->nb_samples);
+    dataOut.deinterleave(reinterpret_cast<const AudioSample*>(output->extended_data[0]),
+        output->nb_samples, output->channels);
+    av_frame_free(&input);
 }

 } // namespace ring
--- a/src/media/audio/resampler.h
+++ b/src/media/audio/resampler.h
@ -21,51 +21,48 @@

 #pragma once

-#include <memory>
-
 #include "audiobuffer.h"
 #include "noncopyable.h"
 #include "ring_types.h"

+struct AVFrame;
+struct SwrContext;
+
 namespace ring {

-class MediaFilter;
-struct MediaStream;
-
+/**
+ * Wrapper class for libswresample
+ */
 class Resampler {
    public:
-        /**
-         * Resampler is used for several situations:
-        * streaming conversion (RTP, IAX), audiolayer conversion,
-        * audio files conversion. Parameters are used to compute
-        * internal buffer size. Resampler must be reinitialized
-        * every time these parameters change
-        */
-        Resampler(AudioFormat outFormat);
-        Resampler(unsigned sample_rate, unsigned channels=1);
-        // empty dtor, needed for unique_ptr
+        Resampler();
        ~Resampler();

        /**
-         * Change the converter sample rate and channel number.
-         * Internal state is lost.
+         * Resample from @input format to @output format.
+         * NOTE: sample_rate, channel_layout, and format should be set on @output
         */
-        void setFormat(AudioFormat format);
+        int resample(const AVFrame* input, AVFrame* output);

        /**
-         * resample from the samplerate1 to the samplerate2
-         * @param dataIn Input buffer
-         * @param dataOut Output buffer
+         * Resample from @dataIn format to @dataOut format.
+         *
+         * NOTE: This is a wrapper for resample(AVFrame*, AVFrame*)
         */
        void resample(const AudioBuffer& dataIn, AudioBuffer& dataOut);

    private:
        NON_COPYABLE(Resampler);

-        void reinitFilter(const MediaStream& inputParams);
+        /**
+         * Reinitializes the resampler when new settings are detected. As long as both input and
+         * output buffers always have the same formats, will never be called, as the first
+         * initialization is done in swr_convert_frame.
+         */
+        void reinit(const AudioFormat& in, const int inSampleFmt,
+                    const AudioFormat& out, const int outSampleFmt);

-        AudioFormat format_; // number of channels and max output frequency
-        std::unique_ptr<MediaFilter> filter_;
+        SwrContext* swrCtx_; // incomplete type, cannot be a unique_ptr
 };

 } // namespace ring
--- a/src/media/audio/sound/audiofile.cpp
+++ b/src/media/audio/sound/audiofile.cpp
@ -69,32 +69,22 @@ AudioFile::AudioFile(const std::string &fileName, unsigned int sampleRate) :
    if (decoder->setupFromAudioData() < 0)
        throw AudioFileException("Decoder setup failed: " + fileName);

-    const auto& ms = decoder->getStream();
-
-    auto filter = std::make_unique<MediaFilter>();
-    // aformat=sample_fmts=s16:channel_layouts=stereo
-    if (filter->initialize("aformat=sample_fmts=s16:channel_layouts=stereo|mono:sample_rates="
-        + std::to_string(getFormat().sample_rate), ms) < 0)
-        throw AudioFileException("Failed to create resampler");
-
+    auto resampler = std::make_unique<Resampler>();
    auto buf = std::make_unique<AudioBuffer>(0, getFormat());
    bool done = false;
    while (!done) {
-        AudioFrame frame;
-        AVFrame* resampled;
-        switch (decoder->decode(frame)) {
+        AudioFrame input;
+        AudioFrame output;
+        auto resampled = output.pointer();
+        switch (decoder->decode(input)) {
        case MediaDecoder::Status::FrameFinished:
-            // TODO move this code to Resampler class with conditional resampling
-            if (filter->feedInput(frame.pointer()) < 0)
+            resampled->sample_rate = getFormat().sample_rate;
+            resampled->channel_layout = av_get_default_channel_layout(getFormat().nb_channels);
+            resampled->format = AV_SAMPLE_FMT_S16;
+            if (resampler->resample(input.pointer(), resampled) < 0)
                throw AudioFileException("Frame could not be resampled");
-            if (!(resampled = filter->readOutput()))
-                throw AudioFileException("Frame could not be resampled");
-            if (buf->append(resampled) < 0) {
-                av_frame_free(&resampled);
+            if (buf->append(resampled) < 0)
                throw AudioFileException("Error while decoding: " + fileName);
-            } else {
-                av_frame_free(&resampled);
-            }
            break;
        case MediaDecoder::Status::DecodeError:
        case MediaDecoder::Status::ReadError:
--- a/src/media/media_decoder.cpp
+++ b/src/media/media_decoder.cpp
@ -463,7 +463,7 @@ MediaDecoder::writeToRingBuffer(const AudioFrame& decodedFrame,
    if ((unsigned)libav_frame->sample_rate != outFormat.sample_rate) {
        if (!resampler_) {
            RING_DBG("Creating audio resampler");
-            resampler_.reset(new Resampler(outFormat));
+            resampler_.reset(new Resampler);
        }
        resamplingBuff_.setFormat({(unsigned) outFormat.sample_rate, (unsigned) decoderCtx_->channels});
        resamplingBuff_.resize(libav_frame->nb_samples);
--- a/test/unitTest/media/audio/test_resampler.cpp
+++ b/test/unitTest/media/audio/test_resampler.cpp
@ -44,8 +44,6 @@ private:
    CPPUNIT_TEST(testResample);
    CPPUNIT_TEST_SUITE_END();

-    void writeWav(); // writes a minimal wav file to test decoding
-
    std::unique_ptr<Resampler> resampler_;
 };

@ -71,12 +69,10 @@ ResamplerTest::testResample()
    const constexpr AudioFormat infmt(44100, 1);
    const constexpr AudioFormat outfmt(48000, 2);

-    resampler_.reset(new Resampler(none));
-
-    resampler_->setFormat(outfmt);
+    resampler_.reset(new Resampler);

    AudioBuffer inbuf(1024, infmt);
-    AudioBuffer outbuf;
+    AudioBuffer outbuf(0, outfmt);

    resampler_->resample(inbuf, outbuf);
    CPPUNIT_ASSERT(outbuf.getFormat().sample_rate == 48000);