From 005c6ffda31693a25cc65aaef19b526daf5cc9c8 Mon Sep 17 00:00:00 2001
From: philippegorley <philippe.gorley@savoirfairelinux.com>
Date: Thu, 25 Oct 2018 13:28:21 -0400
Subject: [PATCH] audio: add file streaming support

When sharing a file, its audio will also be decoded and sent. Audio and
video are synchronized for the first loop of the file only. Further
loops desynchronize them as the audio and video are decoded separately
instead of with the same decoder.

Device audio is muted while streaming. Mixing of the microphone with the
file audio will be added in a later patch.

Adds common audio decoders for file streaming.

Change-Id: Id0593ce4f2d32c249eb7a9672b0091c0d6e07a00
---
 contrib/src/ffmpeg/rules.mak        |   4 +
 src/media/audio/audio_input.cpp     | 137 +++++++++++++++++++++++++---
 src/media/audio/audio_input.h       |  14 ++-
 src/media/audio/audio_rtp_session.h |   2 +-
 4 files changed, 144 insertions(+), 13 deletions(-)
diff --git a/contrib/src/ffmpeg/rules.mak b/contrib/src/ffmpeg/rules.mak
index 119ffb6a3..29ad7a3d8 100644
--- a/contrib/src/ffmpeg/rules.mak
+++ b/contrib/src/ffmpeg/rules.mak
@@ -74,6 +74,10 @@ FFMPEGCONF += \
 FFMPEGCONF += \
 	--enable-decoder=flac \
 	--enable-decoder=vorbis \
+	--enable-decoder=aac \
+	--enable-decoder=ac3 \
+	--enable-decoder=eac3 \
+	--enable-decoder=mp3 \
 	--enable-decoder=pcm_u24be \
 	--enable-decoder=pcm_u24le \
 	--enable-decoder=pcm_u32be \
diff --git a/src/media/audio/audio_input.cpp b/src/media/audio/audio_input.cpp
index 96a9bc3f7..ff2aa95c7 100644
--- a/src/media/audio/audio_input.cpp
+++ b/src/media/audio/audio_input.cpp
@@ -19,9 +19,12 @@
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
  */
 
-#include "dring/media_const.h"
+#include "audio_frame_resizer.h"
 #include "audio_input.h"
+#include "dring/media_const.h"
+#include "fileutils.h" // access
 #include "manager.h"
+#include "media_decoder.h"
 #include "resampler.h"
 #include "ringbufferpool.h"
 #include "smartools.h"
@@ -37,7 +40,10 @@ static constexpr auto MS_PER_PACKET = std::chrono::milliseconds(20);
 AudioInput::AudioInput(const std::string& id) :
     id_(id),
     format_(Manager::instance().getRingBufferPool().getInternalAudioFormat()),
+    frameSize_(format_.sample_rate * MS_PER_PACKET.count() / 1000),
     resampler_(new Resampler),
+    resizer_(new AudioFrameResizer(format_, frameSize_,
+       [this](std::shared_ptr<AudioFrame>&& f){ frameResized(std::move(f)); })),
     loop_([] { return true; },
           [this] { process(); },
           [] {})
@@ -62,10 +68,17 @@ AudioInput::process()
             RING_DBG() << "Switching audio input to '" << devOpts_.input << "'";
     }
 
-    auto frame = std::make_shared<AudioFrame>();
-    if (!nextFromDevice(*frame))
-        return; // no frame
+    // send frame to resizer, frameResized will be called when it can be output
+    if (decodingFile_)
+        nextFromFile();
+    else
+        nextFromDevice();
+}
 
+void
+AudioInput::frameResized(std::shared_ptr<AudioFrame>&& ptr)
+{
+    std::shared_ptr<AudioFrame> frame = std::move(ptr);
     auto ms = MediaStream("a:local", format_, sent_samples);
     frame->pointer()->pts = sent_samples;
     sent_samples += frame->pointer()->nb_samples;
@@ -73,8 +86,8 @@ AudioInput::process()
     notify(std::static_pointer_cast<MediaFrame>(frame));
 }
 
-bool
-AudioInput::nextFromDevice(AudioFrame& frame)
+void
+AudioInput::nextFromDevice()
 {
     auto& mainBuffer = Manager::instance().getRingBufferPool();
     auto bufferFormat = mainBuffer.getInternalAudioFormat();
@@ -85,7 +98,7 @@ AudioInput::nextFromDevice(AudioFrame& frame)
 
     if (mainBuffer.availableForGet(id_) < samplesToGet
         && not mainBuffer.waitForDataAvailable(id_, samplesToGet, MS_PER_PACKET)) {
-        return false;
+        return;
     }
 
     // getData resets the format to internal hardware format, will have to be resampled
@@ -93,7 +106,7 @@ AudioInput::nextFromDevice(AudioFrame& frame)
     micData_.resize(samplesToGet);
     const auto samples = mainBuffer.getData(micData_, id_);
     if (samples != samplesToGet)
-        return false;
+        return;
 
     if (muteState_) // audio is muted, set samples to 0
         micData_.reset();
@@ -108,8 +121,45 @@ AudioInput::nextFromDevice(AudioFrame& frame)
     }
 
     auto audioFrame = resampled.toAVFrame();
-    frame.copyFrom(*audioFrame);
-    return true;
+    resizer_->enqueue(std::move(audioFrame));
+}
+
+void
+AudioInput::nextFromFile()
+{
+    if (!decoder_)
+        return;
+
+    auto frame = std::make_unique<AudioFrame>();
+    const auto ret = decoder_->decode(*frame);
+    const auto inFmt = AudioFormat((unsigned)frame->pointer()->sample_rate, (unsigned)frame->pointer()->channels, (AVSampleFormat)frame->pointer()->format);
+
+    std::lock_guard<std::mutex> lk(fmtMutex_);
+    switch(ret) {
+    case MediaDecoder::Status::ReadError:
+    case MediaDecoder::Status::DecodeError:
+        RING_ERR() << "Failed to decode frame";
+        break;
+    case MediaDecoder::Status::RestartRequired:
+    case MediaDecoder::Status::EOFError:
+        createDecoder();
+        break;
+    case MediaDecoder::Status::FrameFinished:
+        if (inFmt != format_) {
+            AudioFrame out;
+            out.pointer()->format = format_.sampleFormat;
+            out.pointer()->sample_rate = format_.sample_rate;
+            out.pointer()->channel_layout = av_get_default_channel_layout(format_.nb_channels);
+            out.pointer()->channels = format_.nb_channels;
+            resampler_->resample(frame->pointer(), out.pointer());
+            frame->copyFrom(out);
+        }
+        resizer_->enqueue(std::move(frame));
+        break;
+    case MediaDecoder::Status::Success:
+    default:
+        break;
+    }
 }
 
 bool
@@ -122,6 +172,22 @@ AudioInput::initDevice(const std::string& device)
     return true;
 }
 
+bool
+AudioInput::initFile(const std::string& path)
+{
+    if (access(path.c_str(), R_OK) != 0) {
+        RING_ERR() << "File '" << path << "' not available";
+        return false;
+    }
+
+    devOpts_ = {};
+    devOpts_.input = path;
+    devOpts_.loop = "1";
+    createDecoder(); // sets devOpts_'s sample rate and number of channels
+    decodingFile_ = true;
+    return true; // all required info found
+}
+
 std::shared_future<DeviceParams>
 AudioInput::switchInput(const std::string& resource)
 {
@@ -135,6 +201,9 @@ AudioInput::switchInput(const std::string& resource)
 
     RING_DBG() << "Switching audio source to match '" << resource << "'";
 
+    decoder_.reset();
+    decodingFile_ = false;
+
     currentResource_ = resource;
     devOptsFound_ = false;
 
@@ -159,7 +228,13 @@ AudioInput::switchInput(const std::string& resource)
         return {};
 
     const auto suffix = resource.substr(pos + sep.size());
-    if (initDevice(suffix))
+    bool ready = false;
+    if (prefix == DRing::Media::VideoProtocolPrefix::FILE)
+        ready = initFile(suffix);
+    else
+        ready = initDevice(suffix);
+
+    if (ready)
         foundDevOpts(devOpts_);
 
     switchPending_ = true;
@@ -176,11 +251,51 @@ AudioInput::foundDevOpts(const DeviceParams& params)
     }
 }
 
+void
+AudioInput::createDecoder()
+{
+    decoder_.reset();
+    if (devOpts_.input.empty()) {
+        foundDevOpts(devOpts_);
+        return;
+    }
+
+    // NOTE createDecoder is currently only used for files, which require rate emulation
+    auto decoder = std::make_unique<MediaDecoder>();
+    decoder->emulateRate();
+    decoder->setInterruptCallback(
+        [](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
+        this);
+
+    if (decoder->openInput(devOpts_) < 0) {
+        RING_ERR() << "Could not open input '" << devOpts_.input << "'";
+        foundDevOpts(devOpts_);
+        return;
+    }
+
+    if (decoder->setupFromAudioData() < 0) {
+        RING_ERR() << "Could not setup decoder for '" << devOpts_.input << "'";
+        foundDevOpts(devOpts_);
+        return;
+    }
+
+    auto ms = decoder->getStream(devOpts_.input);
+    devOpts_.channel = ms.nbChannels;
+    devOpts_.framerate = ms.sampleRate;
+    RING_DBG() << "Created audio decoder: " << ms;
+
+    decoder_ = std::move(decoder);
+    foundDevOpts(devOpts_);
+}
+
 void
 AudioInput::setFormat(const AudioFormat& fmt)
 {
     std::lock_guard<std::mutex> lk(fmtMutex_);
     format_ = fmt;
+    frameSize_ = format_.sample_rate * MS_PER_PACKET.count() / 1000;
+    resizer_.reset(new AudioFrameResizer(format_, frameSize_,
+       [this](std::shared_ptr<AudioFrame>&& f){ frameResized(std::move(f)); }));
 }
 
 void
diff --git a/src/media/audio/audio_input.h b/src/media/audio/audio_input.h
index 5d442fb2b..071adf2b4 100644
--- a/src/media/audio/audio_input.h
+++ b/src/media/audio/audio_input.h
@@ -33,6 +33,9 @@
 
 namespace ring {
 
+class AudioFrameResizer;
+class MediaDecoder;
+class MediaRecorder;
 struct MediaStream;
 class Resampler;
 
@@ -50,8 +53,12 @@ public:
     MediaStream getInfo() const;
 
 private:
-    bool nextFromDevice(AudioFrame& frame);
+    void nextFromDevice();
+    void nextFromFile();
     bool initDevice(const std::string& device);
+    bool initFile(const std::string& path);
+    void createDecoder();
+    void frameResized(std::shared_ptr<AudioFrame>&& ptr);
 
     std::string id_;
     AudioBuffer micData_;
@@ -59,8 +66,12 @@ private:
     uint64_t sent_samples = 0;
     mutable std::mutex fmtMutex_ {};
     AudioFormat format_;
+    int frameSize_;
 
     std::unique_ptr<Resampler> resampler_;
+    std::unique_ptr<AudioFrameResizer> resizer_;
+    std::weak_ptr<MediaRecorder> recorder_;
+    std::unique_ptr<MediaDecoder> decoder_;
 
     std::string currentResource_;
     std::atomic_bool switchPending_ {false};
@@ -69,6 +80,7 @@ private:
     std::shared_future<DeviceParams> futureDevOpts_;
     std::atomic_bool devOptsFound_ {false};
     void foundDevOpts(const DeviceParams& params);
+    std::atomic_bool decodingFile_ {false};
 
     ThreadLoop loop_;
     void process();
diff --git a/src/media/audio/audio_rtp_session.h b/src/media/audio/audio_rtp_session.h
index eea607156..9710c5a7e 100644
--- a/src/media/audio/audio_rtp_session.h
+++ b/src/media/audio/audio_rtp_session.h
@@ -24,8 +24,8 @@
 
 #include "audiobuffer.h"
 #include "media_device.h"
-#include "threadloop.h"
 #include "rtp_session.h"
+#include "threadloop.h"
 
 #include <string>
 #include <memory>