Skip to content

Commit

Permalink
Added combined 32vid stream format, ANS compression
Browse files Browse the repository at this point in the history
This format combines frames of audio, video and subtitles into a single stream.
This is now the default mode for 32vid output.
ANS compression was also added for faster decoding.
This is a WIP commit - some stuff is broken or missing.
  • Loading branch information
MCJack123 committed Jan 19, 2024
1 parent ed34c26 commit 63fe6fc
Show file tree
Hide file tree
Showing 8 changed files with 837 additions and 25 deletions.
151 changes: 151 additions & 0 deletions 32vid-player-mini.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
local bit32_band, bit32_lshift, bit32_rshift, math_frexp = bit32.band, bit32.lshift, bit32.rshift, math.frexp
local function log2(n) local _, r = math_frexp(n) return r-1 end
local dfpwm = require "cc.audio.dfpwm"

local speaker = peripheral.find "speaker"
local file
local path = ...
if path:match "^https?://" then
file = assert(http.get(path, nil, true))
else
file = assert(fs.open(shell.resolve(path), "rb"))
end

if file.read(4) ~= "32VD" then file.close() error("Not a 32Vid file") end
local width, height, fps, nstreams, flags = ("<HHBBH"):unpack(file.read(8))
print(width, height, fps, nstreams, flags)
if nstreams ~= 1 then file.close() error("Separate stream files not supported by this tool") end
if bit32_band(flags, 1) == 0 then file.close() error("DEFLATE or no compression not supported by this tool") end
local _, nframes, ctype = ("<IIB"):unpack(file.read(9))
if ctype ~= 0x0C then file.close() error("Stream type not supported by this tool") end

local function readDict(size)
local retval = {}
for i = 0, size - 1, 2 do
local b = file.read()
retval[i] = bit32.rshift(b, 4)
retval[i+1] = bit32.band(b, 15)
end
return retval
end
local init, read
if bit32_band(flags, 3) == 1 then
local decodingTable, X, readbits, isColor
function init(c)
isColor = c
local R = file.read()
local L = 2^R
local Ls = readDict(c and 24 or 32)
local a = 0
for i = 0, #Ls do Ls[i] = Ls[i] == 0 and 0 or 2^(Ls[i]-1) a = a + Ls[i] end
assert(a == L, a)
decodingTable = {R = R}
local x, step, next, symbol = 0, 0.625 * L + 3, {}, {}
for i = 0, #Ls do
next[i] = Ls[i]
for _ = 1, Ls[i] do x, symbol[x] = (x + step) % L, i end
end
for x = 0, L - 1 do
local s = symbol[x]
local t = {s = s, n = R - log2(next[s])}
t.X, decodingTable[x], next[s] = bit32_lshift(next[s], t.n) - L, t, 1 + next[s]
end
local partial, bits, pos = 0, 0, 1
function readbits(n)
if not n then n = bits % 8 end
if n == 0 then return 0 end
while bits < n do pos, bits, partial = pos + 1, bits + 8, bit32_lshift(partial, 8) + file.read() end
local retval = bit32_band(bit32_rshift(partial, bits-n), 2^n-1)
bits = bits - n
return retval
end
X = readbits(R)
end
function read(nsym)
local retval = {}
local i = 1
local last = 0
while i <= nsym do
local t = decodingTable[X]
if isColor and t.s >= 16 then
local l = 2^(t.s - 15)
for n = 0, l-1 do retval[i+n] = last end
i = i + l
else retval[i], last, i = t.s, t.s, i + 1 end
X = t.X + readbits(t.n)
end
--print(X)
return retval
end
else

end

local blitColors = {[0] = "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"}
local start = os.epoch "utc"
local lastyield = start
local vframe = 0
local subs = {}
term.clear()
for _ = 1, nframes do
local size, ftype = ("<IB"):unpack(file.read(5))
--print(size, ftype, file.seek())
if ftype == 0 then
if os.epoch "utc" - lastyield > 3000 then sleep(0) lastyield = os.epoch "utc" end
local dcstart = os.epoch "utc"
--print("init screen", vframe, file.seek())
init(false)
--print("read screen", vframe, file.seek())
local screen = read(width * height)
--print("init colors", vframe, file.seek())
init(true)
--print("read bg colors", vframe)
local bg = read(width * height)
--print("read fg colors", vframe)
local fg = read(width * height)
local dctime = os.epoch "utc" - dcstart
while os.epoch "utc" < start + vframe * 1000 / fps do end
for y = 0, height - 1 do
local text, fgs, bgs = "", "", ""
for x = 1, width do
text = text .. string.char(128 + screen[y*width+x])
fgs = fgs .. blitColors[fg[y*width+x]]
bgs = bgs .. blitColors[bg[y*width+x]]
end
term.setCursorPos(1, y+1)
term.blit(text, fgs, bgs)
end
for i = 0, 15 do term.setPaletteColor(2^i, file.read() / 255, file.read() / 255, file.read() / 255) end
local delete = {}
for i, v in ipairs(subs) do
if vframe <= v.frame + v.length then
term.setCursorPos(v.x, v.y)
term.setBackgroundColor(v.bgColor)
term.setTextColor(v.fgColor)
term.write(v.text)
else delete[#delete+1] = i end
end
for i, v in ipairs(delete) do table.remove(subs, v - i + 1) end
term.setCursorPos(1, height + 1)
term.clearLine()
print("Frame decode time:", dctime, "ms")
vframe = vframe + 1
elseif ftype == 1 then
local audio = file.read(size)
if speaker then
if bit32_band(flags, 12) == 0 then
local chunk = {audio:byte(1, -1)}
for i = 1, #chunk do chunk[i] = chunk[i] - 128 end
speaker.playAudio(chunk)
else
speaker.playAudio(dfpwm.decode(audio))
end
end
elseif ftype == 8 then
local data = file.read(size)
local sub = {}
sub.frame, sub.length, sub.x, sub.y, sub.color, sub.flags, sub.text = ("<IIHHBBs2"):unpack(data)
sub.bgColor, sub.fgColor = 2^bit32_rshift(sub.color, 4), 2^bit32_band(sub.color, 15)
subs[#subs+1] = sub
else file.close() error("Unknown frame type " .. ftype) end
end
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ ComputerCraft.
-opath, --output=path Output file path
-l, --lua Output a Lua script file (default for images; only does one frame)
-n, --nfp Output an NFP format image for use in paint (changes proportions!)
-r, --raw Output a rawmode-based image/video file (default for videos)
-b, --blit-image Output a blit image (BIMG) format image/animation file
-r, --raw Output a rawmode-based image/video file
-b, --blit-image Output a blit image (BIMG) format image/animation file (default for videos)
-3, --32vid Output a 32vid format binary video file with compression + audio
-sport, --http=port Serve an HTTP server that has each frame split up + a player program
-wport, --websocket=port Serve a WebSocket that sends the image/video with audio
Expand All @@ -84,8 +84,9 @@ ComputerCraft.
-L, --lab-color Use CIELAB color space for higher quality color conversion
-8, --octree Use octree for higher quality color conversion (slower)
-k, --kmeans Use k-means for highest quality color conversion (slowest)
-cmode, --compression=mode Compression type for 32vid videos; available modes: none|lzw|deflate|custom
-cmode, --compression=mode Compression type for 32vid videos; available modes: none|ans|deflate|custom
-B, --binary Output blit image files in a more-compressed binary format (requires opening the file in binary mode)
-S, --separate-streams Output 32vid files using separate streams (slower to decode)
-d, --dfpwm Use DFPWM compression on audio
-m, --mute Remove audio from output
-Wsize, --width=size Resize the image to the specified width
Expand All @@ -98,6 +99,7 @@ Custom palettes are specified as a list of 16 comma-separated 6-digit hex codes,

### Playback programs
* `32vid-player.lua` plays back 32vid video/audio files from the disk. Simply give it the file name and it will decode and play the file.
* `32vid-player-mini.lua` plays back a limited set of 32vid video/audio files from the disk or web in real-time. Simply give it the file name or URL and it will decode and play the file in real-time.
* `bimg-player.lua` displays BIMG images or animations. Simply give it the file name and it will decode and play the file.
* `raw-player.lua` plays back raw video files from the disk. Simply give it the file name and it will decode and play the file.
* `websocket-player.lua` plays a stream from a sanjuuni WebSocket server. Simply give it the WebSocket URL and it will play the stream, with audio if a speaker is attached.
Expand Down Expand Up @@ -130,7 +132,7 @@ The 32vid format consists of a number of streams, which can hold video, audio, o
| 0x0C | *n* | List of streams |

**Flags:**
* Bits 0-1: Compression for video; 0 = none, 1 = LZW, 2 = DEFLATE, 3 = custom
* Bits 0-1: Compression for video; 0 = none, 1 = custom ANS, 2 = DEFLATE, 3 = custom
* Bits 2-3: Compression for audio; 0 = none (PCM), 1 = DFPWM
* Bit 4: Always set to 1

Expand All @@ -150,6 +152,7 @@ The 32vid format consists of a number of streams, which can hold video, audio, o
* 4-7: Additional audio channels if desired
* 8: Primary subtitles
* 9-11: Alternate subtitles if desired
* 12: Combo data stream

#### Video data
Video data is stored as a list of frames, with each frame consisting of packed pixel data, then colors. Pixel data is stored in a bitstream as 5-bit codes. The low 5 bits correspond to the low 5 bits in the drawing characters, and the character can be derived by adding 128 to the value.
Expand All @@ -165,6 +168,9 @@ The code tree is encoded using canonical Huffman codes, with 4 bits per symbol f

Unlike uncompressed frames, the color block is stored in two sections: the foreground colors are coded first, and then the background colors. This is to allow better run-length encoding. Each frame is compressed separately as well, as opposed to LZW and DEFLATE compression.

#### Custom video compression - ANS
This is a variant of the normal custom format, but uses asymmetrical numeral systems to encode the data. This compresses similarly to the Huffman coding used in normal custom compression, but is easier to decode as it doesn't need to step through each bit (all bits are read at once). The image is also encoded backwards as required by the ANS algorithm.

#### Subtitle events
| Offset | Bytes | Description |
|--------|-------|--------------------------------------|
Expand All @@ -179,6 +185,9 @@ Unlike uncompressed frames, the color block is stored in two sections: the foreg

Subtitle streams are arranged as sequences of subtitle events. Events MUST be ordered by start time; decoders are not required to respect events that are out of order.

#### Combined audio/video streams
This stream format is used to encode audio and video together, which allows real-time decoding of video. It's split into frames of video, audio, and subtitles, which are each prefixed with a 4 byte size, and a 1 byte type code using the stream type codes. A frame only contains a single event of its type: video is one single frame, subtitles are one single event, and audio is a single chunk (which can be any length, ideally around 0.5s). The length field of the stream is used to store the number of frames in the stream. If the file contains a combined stream, it SHOULD NOT contain any other type of stream.

## Library usage
It's possible to use much of the core of sanjuuni as a library for other programs. To do this, simply include all files but `sanjuuni.cpp` in your program, and include `sanjuuni.hpp` in the source you want to use sanjuuni in. Then create a global `WorkQueue work` variable in your source, which is used to delegate tasks to threads. Then use any of the functions in `sanjuuni.hpp` as you need. Basic documentation is available in the header.

Expand Down
72 changes: 72 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -3997,6 +3997,18 @@ else $as_nop
as_fn_error $? "Could not find libavdevice headers." "$LINENO" 5
fi

done
for ac_header in libavfilter/avfilter.h
do :
ac_fn_cxx_check_header_compile "$LINENO" "libavfilter/avfilter.h" "ac_cv_header_libavfilter_avfilter_h" "$ac_includes_default"
if test "x$ac_cv_header_libavfilter_avfilter_h" = xyes
then :
printf "%s\n" "#define HAVE_LIBAVFILTER_AVFILTER_H 1" >>confdefs.h

else $as_nop
as_fn_error $? "Could not find libavfilter headers." "$LINENO" 5
fi

done
for ac_header in libswscale/swscale.h
do :
Expand Down Expand Up @@ -4287,6 +4299,66 @@ else $as_nop
as_fn_error $? "Could not find libavdevice library." "$LINENO" 5
fi

{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing avfilter_version" >&5
printf %s "checking for library containing avfilter_version... " >&6; }
if test ${ac_cv_search_avfilter_version+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_func_search_save_LIBS=$LIBS
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
namespace conftest {
extern "C" int avfilter_version ();
}
int
main (void)
{
return conftest::avfilter_version ();
;
return 0;
}
_ACEOF
for ac_lib in '' avfilter
do
if test -z "$ac_lib"; then
ac_res="none required"
else
ac_res=-l$ac_lib
LIBS="-l$ac_lib $ac_func_search_save_LIBS"
fi
if ac_fn_cxx_try_link "$LINENO"
then :
ac_cv_search_avfilter_version=$ac_res
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext
if test ${ac_cv_search_avfilter_version+y}
then :
break
fi
done
if test ${ac_cv_search_avfilter_version+y}
then :

else $as_nop
ac_cv_search_avfilter_version=no
fi
rm conftest.$ac_ext
LIBS=$ac_func_search_save_LIBS
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_avfilter_version" >&5
printf "%s\n" "$ac_cv_search_avfilter_version" >&6; }
ac_res=$ac_cv_search_avfilter_version
if test "$ac_res" != no
then :
test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"

else $as_nop
as_fn_error $? "Could not find libavfilter library." "$LINENO" 5
fi

{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing avformat_open_input" >&5
printf %s "checking for library containing avformat_open_input... " >&6; }
if test ${ac_cv_search_avformat_open_input+y}
Expand Down
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ AX_CHECK_COMPILE_FLAG([-std=c++11], [], [AC_MSG_ERROR([C++ compiler does not sup
AC_CHECK_HEADERS([libavformat/avformat.h], [], [AC_MSG_ERROR([Could not find libavformat headers.])])
AC_CHECK_HEADERS([libavcodec/avcodec.h], [], [AC_MSG_ERROR([Could not find libavcodec headers.])])
AC_CHECK_HEADERS([libavdevice/avdevice.h], [], [AC_MSG_ERROR([Could not find libavdevice headers.])])
AC_CHECK_HEADERS([libavfilter/avfilter.h], [], [AC_MSG_ERROR([Could not find libavfilter headers.])])
AC_CHECK_HEADERS([libswscale/swscale.h], [], [AC_MSG_ERROR([Could not find libswscale headers.])])
AC_CHECK_HEADERS([libswresample/swresample.h], [], [AC_MSG_ERROR([Could not find libswresample headers.])])
AC_CHECK_HEADERS([zlib.h], [], [AC_MSG_ERROR([Could not find zlib headers.])])
Expand All @@ -25,6 +26,7 @@ AC_CHECK_HEADERS([CL/opencl.hpp], [], [HAS_OPENCL=0])
AC_SEARCH_LIBS(pthread_create, pthread, [], [AC_MSG_ERROR([Could not find pthread library.])])
AC_SEARCH_LIBS(avcodec_find_decoder, avcodec, [], [AC_MSG_ERROR([Could not find libavcodec library.])])
AC_SEARCH_LIBS(avdevice_register_all, avdevice, [], [AC_MSG_ERROR([Could not find libavdevice library.])])
AC_SEARCH_LIBS(avfilter_version, avfilter, [], [AC_MSG_ERROR([Could not find libavfilter library.])])
AC_SEARCH_LIBS(avformat_open_input, avformat, [], [AC_MSG_ERROR([Could not find libavformat library.])])
AC_SEARCH_LIBS(av_frame_alloc, avutil, [], [AC_MSG_ERROR([Could not find libavutil library.])])
AC_SEARCH_LIBS(sws_getContext, swscale, [], [AC_MSG_ERROR([Could not find libswscale library.])])
Expand Down
Loading

0 comments on commit 63fe6fc

Please sign in to comment.