From 7346f0be8ac8d4c771851beaaee42af7e8e8a143 Mon Sep 17 00:00:00 2001
From: Garrett Singer <gesinger@gmail.com>
Date: Tue, 28 Feb 2017 17:51:51 -0500
Subject: [PATCH 1/2] Fix silence insertion to not insert extra frames when
 audio is offset from video

---
 lib/mp4/transmuxer.js   | 12 +++---
 test/transmuxer.test.js | 89 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/lib/mp4/transmuxer.js b/lib/mp4/transmuxer.js
index aa157788..0eca7840 100644
--- a/lib/mp4/transmuxer.js
+++ b/lib/mp4/transmuxer.js
@@ -215,14 +215,16 @@ AudioSegmentStream = function(track) {
     }
 
     baseMediaDecodeTimeTs = clock.audioTsToVideoTs(track.baseMediaDecodeTime, track.samplerate);
+    // determine frame clock duration based on sample rate, round up to avoid overfills
+    frameDuration = Math.ceil(ONE_SECOND_IN_TS / (track.samplerate / 1024));
 
     if (audioAppendStartTs &&
         videoBaseMediaDecodeTime &&
-        // old audio doesn't overlap with new audio
-        audioAppendStartTs < baseMediaDecodeTimeTs) {
-      audioGapDuration = baseMediaDecodeTimeTs - videoBaseMediaDecodeTime;
-      // determine frame clock duration based on sample rate, round up to avoid overfills
-      frameDuration = Math.ceil(ONE_SECOND_IN_TS / (track.samplerate / 1024));
+        // gap is at least a frame duration
+        baseMediaDecodeTimeTs - audioAppendStartTs >= frameDuration) {
+      // insert the shortest possible amount (audio gap or audio to video gap)
+      audioGapDuration =
+        baseMediaDecodeTimeTs - Math.max(audioAppendStartTs, videoBaseMediaDecodeTime);
       // number of full frames in the audio gap
       audioFillFrameCount = Math.floor(audioGapDuration / frameDuration);
       // ensure gap is a whole number of frames
diff --git a/test/transmuxer.test.js b/test/transmuxer.test.js
index a66efaca..d1827497 100644
--- a/test/transmuxer.test.js
+++ b/test/transmuxer.test.js
@@ -2411,6 +2411,55 @@ QUnit.test('fills audio gaps with existing frame if odd sample rate', function()
                'filled all but frame remainder between video start and audio start');
 });
 
+QUnit.test('fills audio gaps with smaller of audio gap and audio-video gap', function() {
+  var
+    events = [],
+    boxes,
+    offsetSeconds = clock.videoTsToSeconds(111),
+    videoGap = 0.29,
+    sampleRate = 44100,
+    frameDuration = Math.ceil(90e3 / (sampleRate / 1024)),
+    frameSeconds = clock.videoTsToSeconds(frameDuration),
+    // audio gap smaller, should be used as fill
+    numSilentFrames = 1,
+    // buffer for imprecise numbers
+    audioGap = frameSeconds + offsetSeconds + 0.001,
+    oldAudioEnd = 10.5,
+    audioBMDT;
+
+  audioSegmentStream.on('data', function(event) {
+    events.push(event);
+  });
+
+  audioSegmentStream.setAudioAppendStart(clock.secondsToVideoTs(oldAudioEnd));
+  audioSegmentStream.setVideoBaseMediaDecodeTime(clock.secondsToVideoTs(10 + videoGap));
+
+  audioSegmentStream.push({
+    channelcount: 2,
+    samplerate: sampleRate,
+    pts: clock.secondsToVideoTs(oldAudioEnd + audioGap),
+    dts: clock.secondsToVideoTs(oldAudioEnd + audioGap),
+    data: new Uint8Array([1])
+  });
+
+  audioSegmentStream.flush();
+
+  QUnit.equal(events.length, 1, 'a data event fired');
+  QUnit.equal(events[0].track.samples.length, 1 + numSilentFrames, 'generated samples');
+  QUnit.equal(events[0].track.samples[0].size, 364, 'silent sample');
+  QUnit.equal(events[0].track.samples[1].size, 1, 'normal sample');
+  boxes = mp4.tools.inspect(events[0].boxes);
+
+  audioBMDT = boxes[0].boxes[1].boxes[1].baseMediaDecodeTime;
+
+  QUnit.equal(
+    Math.floor(clock.secondsToVideoTs(oldAudioEnd + audioGap) -
+               clock.audioTsToVideoTs(audioBMDT, sampleRate) -
+               clock.secondsToVideoTs(offsetSeconds)),
+    Math.floor(frameDuration + 0.001),
+    'filled length of audio gap only');
+});
+
 QUnit.test('does not fill audio gaps if no audio append start time', function() {
   var
     events = [],
@@ -2474,7 +2523,6 @@ QUnit.test('does not fill audio gap if no video base media decode time', functio
               'did not fill the gap');
 });
 
-
 QUnit.test('does not fill audio gaps greater than a half second', function() {
   var
     events = [],
@@ -2508,6 +2556,45 @@ QUnit.test('does not fill audio gaps greater than a half second', function() {
               'did not fill gap');
 });
 
+QUnit.test('does not fill audio gaps smaller than a frame duration', function() {
+  var
+    events = [],
+    boxes,
+    offsetSeconds = clock.videoTsToSeconds(111),
+    // audio gap small enough that it shouldn't be filled
+    audioGap = 0.001,
+    newVideoStart = 10,
+    oldAudioEnd = 10.3,
+    newAudioStart = oldAudioEnd + audioGap + offsetSeconds;
+
+  audioSegmentStream.on('data', function(event) {
+    events.push(event);
+  });
+
+  // the real audio gap is tiny, but the gap between the new video and audio segments
+  // would be large enough to fill
+  audioSegmentStream.setAudioAppendStart(clock.secondsToVideoTs(oldAudioEnd));
+  audioSegmentStream.setVideoBaseMediaDecodeTime(clock.secondsToVideoTs(newVideoStart));
+
+  audioSegmentStream.push({
+    channelcount: 2,
+    samplerate: 90e3,
+    pts: clock.secondsToVideoTs(newAudioStart),
+    dts: clock.secondsToVideoTs(newAudioStart),
+    data: new Uint8Array([1])
+  });
+
+  audioSegmentStream.flush();
+
+  QUnit.equal(events.length, 1, 'a data event fired');
+  QUnit.equal(events[0].track.samples.length, 1, 'generated samples');
+  QUnit.equal(events[0].track.samples[0].size, 1, 'normal sample');
+  boxes = mp4.tools.inspect(events[0].boxes);
+  QUnit.equal(boxes[0].boxes[1].boxes[1].baseMediaDecodeTime,
+              clock.secondsToVideoTs(newAudioStart - offsetSeconds),
+              'did not fill gap');
+});
+
 QUnit.test('ensures baseMediaDecodeTime for audio is not negative', function() {
   var events = [], boxes;
 

From 6fa12609f2d0d990dad026f7841a41d9473266a7 Mon Sep 17 00:00:00 2001
From: Garrett Singer <gesinger@gmail.com>
Date: Wed, 1 Mar 2017 14:37:45 -0500
Subject: [PATCH 2/2] Remove unnecessary check and outdated comment in silence
 insertion

---
 lib/mp4/transmuxer.js | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lib/mp4/transmuxer.js b/lib/mp4/transmuxer.js
index 0eca7840..ca67233b 100644
--- a/lib/mp4/transmuxer.js
+++ b/lib/mp4/transmuxer.js
@@ -218,16 +218,12 @@ AudioSegmentStream = function(track) {
     // determine frame clock duration based on sample rate, round up to avoid overfills
     frameDuration = Math.ceil(ONE_SECOND_IN_TS / (track.samplerate / 1024));
 
-    if (audioAppendStartTs &&
-        videoBaseMediaDecodeTime &&
-        // gap is at least a frame duration
-        baseMediaDecodeTimeTs - audioAppendStartTs >= frameDuration) {
+    if (audioAppendStartTs && videoBaseMediaDecodeTime) {
       // insert the shortest possible amount (audio gap or audio to video gap)
       audioGapDuration =
         baseMediaDecodeTimeTs - Math.max(audioAppendStartTs, videoBaseMediaDecodeTime);
       // number of full frames in the audio gap
       audioFillFrameCount = Math.floor(audioGapDuration / frameDuration);
-      // ensure gap is a whole number of frames
       audioFillDuration = audioFillFrameCount * frameDuration;
     }