From e501ca6ac2ecdc43452fe8596692f054801dcb2d Mon Sep 17 00:00:00 2001
From: MCJack123 <jackmacwindowslinux@gmail.com>
Date: Tue, 2 Apr 2024 16:45:55 -0400
Subject: [PATCH] Some fixes

- Lua files now properly support animation
- Lua files are now the default for all file types
- FPS is now guessed if the file is variable framerate (may cause some
frame skipping!)
- Variable framerate files no longer cause an error (fixes all animated GIFs
failing to convert)
- 32vid-player-mini now properly clears the screen when done
- Adjustments to websocket-player that may or may not break things
---
 32vid-player-mini.lua |  8 +++++++-
 README.md             | 17 +++++++++++++++--
 src/generator.cpp     |  5 +++--
 src/sanjuuni.cpp      | 28 +++++++++++++++++++++-------
 websocket-player.lua  | 22 ++++++++++++----------
 5 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/32vid-player-mini.lua b/32vid-player-mini.lua
index 4606d72..e5d17f9 100644
--- a/32vid-player-mini.lua
+++ b/32vid-player-mini.lua
@@ -90,7 +90,7 @@ if bit32_band(flags, 3) == 1 then
         return retval
     end
 else
-
+    error("Unimplemented!")
 end
 
 local blitColors = {[0] = "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"}
@@ -165,3 +165,9 @@ for _ = 1, nframes do
         subs[#subs+1] = sub
     else file.close() error("Unknown frame type " .. ftype) end
 end
+
+for i = 0, 15 do term.setPaletteColor(2^i, term.nativePaletteColor(2^i)) end
+term.setBackgroundColor(colors.black)
+term.setTextColor(colors.white)
+term.setCursorPos(1, 1)
+term.clear()
diff --git a/README.md b/README.md
index 9a9535a..20a9a4b 100644
--- a/README.md
+++ b/README.md
@@ -105,7 +105,7 @@ Custom palettes are specified as a list of 16 comma-separated 6-digit hex codes,
 * `websocket-player.lua` plays a stream from a sanjuuni WebSocket server. Simply give it the WebSocket URL and it will play the stream, with audio if a speaker is attached.
 
 ## Formats
-* The Lua file output creates a simple script that displays the image and waits for the return key to be pressed. The data is stored as a plain blit table, so it can be copied to another file as desired with the display code.
+* The Lua file output creates a simple script that displays the image/animation, and if there's only one frame, waits for the return key to be pressed. The data is stored as plain blit table(s), so it can be copied to another file as desired with the display code.
 * The raw file output creates a file based on [CraftOS-PC Raw Mode](https://www.craftos-pc.cc/docs/rawmode). Each frame packet is stored on one line, and the first two lines contain a version header and the FPS. (If the FPS is 0, this is a plain image.)
 * The blit image file output creates a file based on [the BIMG specification](https://github.com/SkyTheCodeMaster/bimg). This is similar to Lua output, but stored in a serialized table for reading by other files. It also supports animations, but no audio.
 * The 32vid file output creates a file that uses the 32vid format described below, which is a binary file that stores compressed and optimized versions of the video and audio data in multiple streams.
@@ -153,6 +153,14 @@ The 32vid format consists of a number of streams, which can hold video, audio, o
 * 8: Primary subtitles
 * 9-11: Alternate subtitles if desired
 * 12: Combo data stream
+* 13: Combo stream indexes
+* 64-127: Multi-monitor video
+  * Bitfield describes monitor placement:
+    * 2 bits (`01`) for chunk type
+    * 3 bits for monitor X (0-7)
+    * 3 bits for monitor Y (0-7)
+
+As of sanjuuni 0.5, the old-style format with separate streams is deprecated. It remains available for legacy applications, but new features will only be supported in the combo stream format.
 
 #### Video data
 Video data is stored as a list of frames, with each frame consisting of packed pixel data, then colors. Pixel data is stored in a bitstream as 5-bit codes. The low 5 bits correspond to the low 5 bits in the drawing characters, and the character can be derived by adding 128 to the value.
@@ -186,7 +194,12 @@ This is a variant of the normal custom format, but uses asymmetrical numeral sys
 Subtitle streams are arranged as sequences of subtitle events. Events MUST be ordered by start time; decoders are not required to respect events that are out of order.
 
 #### Combined audio/video streams
-This stream format is used to encode audio and video together, which allows real-time decoding of video. It's split into frames of video, audio, and subtitles, which are each prefixed with a 4 byte size, and a 1 byte type code using the stream type codes. A frame only contains a single event of its type: video is one single frame, subtitles are one single event, and audio is a single chunk (which can be any length, ideally around 0.5s). The length field of the stream is used to store the number of frames in the stream. If the file contains a combined stream, it SHOULD NOT contain any other type of stream.
+This stream format is used to encode audio and video together, which allows real-time decoding of video. It's split into frames of video, audio, and subtitles, which are each prefixed with a 4 byte size, and a 1 byte type code using the stream type codes. A frame only contains a single event of its type: video is one single frame, subtitles are one single event, and audio is a single chunk (which can be any length, ideally around 0.5s). The length field of the stream is used to store the number of frames in the stream. If the file contains a combined stream, it SHOULD NOT contain any other type of stream, except an index if available.
+
+#### Combined stream index table
+This chunk type stores an index table for the audio/video stream, which can speed up seeking in the file. It contains a single byte with the number of video frames per entry, and afterwards is split up into 32-bit words, where each word is an offset into the file for the start of that video frame. For example, if the first byte is 60, the first word will point to video frame 0, the next will point to frame 60, then frame 120, and so on. Note that this counts video frames specifically - the index will never point to an audio or subtitle frame.
+
+Index tables MAY be stored at either the beginning or end of the file. If your application is looking for an index table and it is not the first stream, seek past the combo stream and check whether it's after the stream. sanjuuni stores the index at the end for efficiency.
 
 ## Library usage
 It's possible to use much of the core of sanjuuni as a library for other programs. To do this, simply include all files but `sanjuuni.cpp` in your program, and include `sanjuuni.hpp` in the source you want to use sanjuuni in. Then create a global `WorkQueue work` variable in your source, which is used to delegate tasks to threads. Then use any of the functions in `sanjuuni.hpp` as you need. Basic documentation is available in the header.
diff --git a/src/generator.cpp b/src/generator.cpp
index 8df0af3..62aa3a1 100644
--- a/src/generator.cpp
+++ b/src/generator.cpp
@@ -19,6 +19,7 @@
  */
 
 #include "sanjuuni.hpp"
+#include <cstring>
 #include <algorithm>
 #include <sstream>
 #include <stack>
@@ -292,7 +293,7 @@ struct compare_node {bool operator()(tree_node *a, tree_node *b) {
 
 std::string make32vid_cmp(const uchar * characters, const uchar * colors, const std::vector<Vec3b>& palette, int width, int height) {
     std::string screen, col, pal;
-    tree_node screen_nodes[32] = {0}, color_nodes[24] = {0}; // color codes 16-23 = repeat last color 2^(n-15) times
+    tree_node screen_nodes[32] = {}, color_nodes[24] = {}; // color codes 16-23 = repeat last color 2^(n-15) times
     tree_node internal[31];
     tree_node * internal_next = internal;
     uchar * fgcolors = new uchar[width*height];
@@ -836,5 +837,5 @@ std::string make32vid_ans(const uchar * characters, const uchar * colors, const
 }
 
 std::string makeLuaFile(const uchar * characters, const uchar * colors, const std::vector<Vec3b>& palette, int width, int height) {
-    return "-- Generated with sanjuuni\n-- https://sanjuuni.madefor.cc\ndo local image, palette = " + makeTable(characters, colors, palette, width, height) + "\n\nterm.clear()\nfor i = 0, #palette do term.setPaletteColor(2^i, table.unpack(palette[i])) end\nfor y, r in ipairs(image) do\n    term.setCursorPos(1, y)\n    term.blit(table.unpack(r))\nend\nread()\nfor i = 0, 15 do term.setPaletteColor(2^i, term.nativePaletteColor(2^i)) end\nterm.setBackgroundColor(colors.black)\nterm.setTextColor(colors.white)\nterm.setCursorPos(1, 1)\nterm.clear() end\n";
+    return "-- Generated with sanjuuni\n-- https://sanjuuni.madefor.cc\ndo\nlocal image, palette = " + makeTable(characters, colors, palette, width, height) + "\n\nterm.clear()\nfor i = 0, #palette do term.setPaletteColor(2^i, table.unpack(palette[i])) end\nfor y, r in ipairs(image) do\n    term.setCursorPos(1, y)\n    term.blit(table.unpack(r))\nend\nend\n";
 }
diff --git a/src/sanjuuni.cpp b/src/sanjuuni.cpp
index 47fbfe2..87d8f13 100644
--- a/src/sanjuuni.cpp
+++ b/src/sanjuuni.cpp
@@ -492,10 +492,10 @@ int main(int argc, const char * argv[]) {
     options.addOption(Option("subtitle", "S", "ASS-formatted subtitle file to add to the video", false, "file", true));
     options.addOption(Option("format", "f", "Force a format to use for the input file", false, "format", true));
     options.addOption(Option("output", "o", "Output file path", false, "path", true));
-    options.addOption(Option("lua", "l", "Output a Lua script file (default for images; only does one frame)"));
+    options.addOption(Option("lua", "l", "Output a Lua script file (default)"));
     options.addOption(Option("nfp", "n", "Output an NFP format image for use in paint (changes proportions!)"));
     options.addOption(Option("raw", "r", "Output a rawmode-based image/video file"));
-    options.addOption(Option("blit-image", "b", "Output a blit image (BIMG) format image/animation file (default for videos)"));
+    options.addOption(Option("blit-image", "b", "Output a blit image (BIMG) format image/animation file"));
     options.addOption(Option("32vid", "3", "Output a 32vid format binary video file with compression + audio"));
     options.addOption(Option("http", "s", "Serve an HTTP server that has each frame split up + a player program", false, "port", true).validator(new IntValidator(1, 65535)));
     options.addOption(Option("websocket", "w", "Serve a WebSocket that sends the image/video with audio", false, "port", true).validator(new IntValidator(1, 65535)));
@@ -686,7 +686,7 @@ int main(int argc, const char * argv[]) {
         avformat_close_input(&format_ctx);
         return error;
     }
-    if (mode == OutputType::Default) mode = format_ctx->streams[video_stream]->nb_frames > 0 && !monitorWidth ? OutputType::BlitImage : OutputType::Lua;
+    if (mode == OutputType::Default) mode = OutputType::Lua;
     if (mode == OutputType::Vid32 && !separateStreams) {
         if (!(filter_graph = avfilter_graph_alloc())) {
             std::cerr << "Could not allocate filter graph\n";
@@ -922,6 +922,7 @@ int main(int argc, const char * argv[]) {
     auto start = system_clock::now();
     auto lastUpdate = system_clock::now() - seconds(1);
     bool first = true;
+    int64_t totalDuration = 0;
 #ifndef NO_NET
     if (mode == OutputType::HTTP) {
         srv = new HTTPServer(new HTTPListener::Factory(&fps), port);
@@ -1019,12 +1020,12 @@ int main(int argc, const char * argv[]) {
     while (av_read_frame(format_ctx, packet) >= 0) {
         if (packet->stream_index == video_stream) {
             avcodec_send_packet(video_codec_ctx, packet);
-            fps = (double)video_codec_ctx->framerate.num / (double)video_codec_ctx->framerate.den;
-            if (fps < 1) {
+            fps = av_q2d(video_codec_ctx->framerate);
+            /*if (fps < 1 && format_ctx->streams[video_stream]->nb_frames > 1) {
                 std::cerr << "Variable framerate files are not supported.\n";
                 av_packet_unref(packet);
                 goto cleanup;
-            }
+            }*/
             if (first) {
                 if (!subtitle.empty()) subtitles = parseASSSubtitles(subtitle, fps);
                 if (mode == OutputType::Raw) outstream << "32Vid 1.1\n" << fps << "\n";
@@ -1043,6 +1044,7 @@ int main(int argc, const char * argv[]) {
 #endif
                     lastUpdate = now;
                 } else nframe++;
+                totalDuration += frame->duration;
                 if (resize_ctx == NULL) {
                     if (width != -1 || height != -1) {
                         width = width == -1 ? height * ((double)frame->width / (double)frame->height) : width;
@@ -1107,7 +1109,7 @@ int main(int argc, const char * argv[]) {
                     convertImage(rs, &characters, &colors, palette, w, h, nframe);
                     switch (mode) {
                     case OutputType::Lua: {
-                        outstream << makeLuaFile(characters, colors, palette, w / 2, h / 3);
+                        outstream << makeLuaFile(characters, colors, palette, w / 2, h / 3) << "sleep(" << (frame->duration * av_q2d(format_ctx->streams[video_stream]->time_base)) << ")\n";
                         outstream.flush();
                         break;
                     } case OutputType::NFP: {
@@ -1264,6 +1266,15 @@ int main(int argc, const char * argv[]) {
         if (externalStop) break;
 #endif
     }
+    if (fps < 1) {
+        fps = nframe / (totalDuration * av_q2d(format_ctx->streams[video_stream]->time_base));
+        if (mode == OutputType::Vid32 && !separateStreams) {
+            auto pos = outstream.tellp();
+            outstream.seekp(8, std::ios::beg);
+            outstream.put(floor(fps + 0.5));
+            outstream.seekp(pos, std::ios::beg);
+        }
+    }
     if (mode == OutputType::Vid32 && separateStreams) {
         Vid32Chunk videoChunk, audioChunk;
         Vid32Header header;
@@ -1338,6 +1349,9 @@ int main(int argc, const char * argv[]) {
         }
         if (binary) outfile << "creator='sanjuuni',version='1.0.0',secondsPerFrame=" << (1.0 / fps) << ",animation=" << (nframe > 1 ? "true" : "false") << ",date='" << timestr << "',title='" << input << "'}";
         else outfile << "creator = 'sanjuuni',\nversion = '1.0.0',\nsecondsPerFrame = " << (1.0 / fps) << ",\nanimation = " << (nframe > 1 ? "true" : "false") << ",\ndate = '" << timestr << "',\ntitle = '" << input << "'\n}\n";
+    } else if (mode == OutputType::Lua) {
+        if (nframe == 1) outfile << "read()\n";
+        outfile << "for i = 0, 15 do term.setPaletteColor(2^i, term.nativePaletteColor(2^i)) end\nterm.setBackgroundColor(colors.black)\nterm.setTextColor(colors.white)\nterm.setCursorPos(1, 1)\nterm.clear()\n";
     }
 cleanup:
     auto t = system_clock::now() - start;
diff --git a/websocket-player.lua b/websocket-player.lua
index 8e27f3b..27791a5 100644
--- a/websocket-player.lua
+++ b/websocket-player.lua
@@ -1,9 +1,9 @@
 local ws, err = http.websocket(...)
 if not ws then error("Could not connect to WebSocket server: " .. err) end
 ws.send("n")
-local nFrames = tonumber(ws.receive(), nil)
+local nFrames = tonumber(ws.receive(), 10)
 ws.send("f")
-local fps = tonumber(ws.receive(), nil)
+local fps = tonumber(ws.receive(), 10)
 local speaker = peripheral.find "speaker"
 term.clear()
 local lock = false
@@ -13,15 +13,17 @@ parallel.waitForAll(function()
         while lock do os.pullEvent() end
         lock = true
         ws.send("v" .. f)
-        local frame, ok = ws.receive(1)
-        while #frame % 65535 == 0 do frame = frame .. ws.receive(1) end
+        local frame, ok = ws.receive()
+        while #frame % 65535 == 0 do frame = frame .. ws.receive() end
         lock = false
-        if not ok then break end
-        local image, palette = assert(load(frame, "=frame", "t", {}))()
-        for i = 0, #palette do term.setPaletteColor(2^i, table.unpack(palette[i])) end
-        for y, r in ipairs(image) do
-            term.setCursorPos(1, y)
-            term.blit(table.unpack(r))
+        --if not ok then break end
+        if load(frame) then
+            local image, palette = assert(load(frame, "=frame", "t", {}))()
+            for i = 0, #palette do term.setPaletteColor(2^i, table.unpack(palette[i])) end
+            for y, r in ipairs(image) do
+                term.setCursorPos(1, y)
+                term.blit(table.unpack(r))
+            end
         end
         while os.epoch "utc" < start + (f + 1) / fps * 1000 do sleep(1 / fps) end
     end