Media fixes (#409)

* Fix video aspect ratio and output size * Only allow 24fps worth of video into the queue --------- Co-authored-by: Rafał Leszko <[email protected]>
livepeer · Feb 5, 2025 · 6a9a95e · 6a9a95e
1 parent ff4143e
commit 6a9a95e
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 14 deletions.
diff --git a/runner/app/live/trickle/decoder.py b/runner/app/live/trickle/decoder.py
@@ -5,19 +5,15 @@
 
 from .frame import InputFrame
 
+MAX_FRAMERATE=24
+
 def decode_av(pipe_input, frame_callback, put_metadata):
     """
-    Reads from a pipe (or file-like object). If both audio and video
-    streams exist, for each decoded video frame, we gather all audio frames
-    whose PTS is <= the video frame's PTS, then call `frame_callback`.
-
-    Cases handled:
-      - No audio (video only).
-      - No video (audio only).
-      - Both audio and video.
+    Reads from a pipe (or file-like object).
 
     :param pipe_input: File path, 'pipe:', sys.stdin, or another file-like object.
-    :param frame_callback A function that accepts an InputFrame object
+    :param frame_callback: A function that accepts an InputFrame object
+    :param put_metadata: A function that accepts audio/video metadata
     """
     container = av.open(pipe_input)
 
@@ -52,8 +48,11 @@ def decode_av(pipe_input, frame_callback, put_metadata):
             "height": video_stream.codec_context.height,
             "pix_fmt": video_stream.codec_context.pix_fmt,
             "time_base": video_stream.time_base,
-            # usually unreliable, especially with webrtc
+            # framerate is usually unreliable, especially with webrtc
             "framerate": video_stream.codec_context.framerate,
+            "sar": video_stream.codec_context.sample_aspect_ratio,
+            "dar": video_stream.codec_context.display_aspect_ratio,
+            "format": str(video_stream.codec_context.format),
         }
 
     if video_metadata is None and audio_metadata is None:
@@ -66,6 +65,8 @@ def decode_av(pipe_input, frame_callback, put_metadata):
     put_metadata(metadata)
 
     reformatter = av.video.reformatter.VideoReformatter()
+    frame_interval = 1.0 / MAX_FRAMERATE
+    next_pts_time = 0.0
     try:
         for packet in container.demux():
             if packet.dts is None:
@@ -87,11 +88,24 @@ def decode_av(pipe_input, frame_callback, put_metadata):
                     if frame.pts is None:
                         continue
 
+                    # drop frames that come in too fast
+                    # TODO also check timing relative to wall clock
+                    pts_time = frame.time
+                    if pts_time < next_pts_time:
+                        # frame is too early, so drop it
+                        continue
+                    if pts_time > next_pts_time + frame_interval:
+                        # frame is delayed, so reset based on frame pts
+                        next_pts_time = pts_time + frame_interval
+                    else:
+                        # not delayed, so use prev pts to allow more jitter
+                        next_pts_time = next_pts_time + frame_interval
+
                     w = 512
-                    h = int((512 * frame.width / frame.height) / 2) * 2 # force divisible by 2
+                    h = int((512 * frame.height / frame.width) / 2) * 2 # force divisible by 2
                     if frame.height > frame.width:
                         h = 512
-                        w = int((512 * frame.height / frame.width) / 2) * 2
+                        w = int((512 * frame.width / frame.height) / 2) * 2
                     frame = reformatter.reformat(frame, format='rgba', width=w, height=h)
                     avframe = InputFrame.from_av_video(frame)
                     frame_callback(avframe)

diff --git a/runner/app/live/trickle/encoder.py b/runner/app/live/trickle/encoder.py
@@ -29,7 +29,7 @@ def encode_av(
     video_meta = decoded_metadata['video']
     audio_meta = decoded_metadata['audio']
 
-    logging.info(f"Encoder recevied metadata video={video_meta is None} audio={audio_meta is None}")
+    logging.info(f"Encoder recevied metadata video={video_meta is not None} audio={audio_meta is not None}")
 
     def custom_io_open(url: str, flags: int, options: dict):
         read_fd, write_fd = os.pipe()
@@ -47,7 +47,7 @@ def custom_io_open(url: str, flags: int, options: dict):
 
     if video_meta and video_codec:
         # Add a new stream to the output using the desired video codec
-        video_opts = { 'width':'512', 'height':'512', 'bf':'0' }
+        video_opts = { 'video_size':'512x512', 'bf':'0' }
         if video_codec == 'libx264':
             video_opts = video_opts | { 'preset':'superfast', 'tune':'zerolatency', 'forced-idr':'1' }
         output_video_stream = output_container.add_stream(video_codec, options=video_opts)