From 0287c18a0d7b5244eab93aa35ca8d71ddcf10f22 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Sun, 25 Dec 2022 14:21:31 -0500 Subject: [PATCH] Reject all unassigned and invalid Unicode codepoints This required build system changes, as the resulting generated table really needs to be in a separate file. --- qrexec-lib/Makefile | 15 ++++++++++--- gentbl.py => qrexec-lib/gentbl.py | 4 +--- qrexec-lib/unpack.c | 37 +------------------------------ 3 files changed, 14 insertions(+), 42 deletions(-) rename gentbl.py => qrexec-lib/gentbl.py (90%) diff --git a/qrexec-lib/Makefile b/qrexec-lib/Makefile index de395713..5053fe6f 100644 --- a/qrexec-lib/Makefile +++ b/qrexec-lib/Makefile @@ -2,12 +2,20 @@ CC=gcc CFLAGS+=-I. -g -O2 -Wall -Wextra -Werror -pie -fPIC SO_VER=2 LDFLAGS+=-shared +.PHONY: all clean install +objs := ioall.o copy-file.o crc32.o unpack.o pack.o all: libqubes-rpc-filecopy.so.$(SO_VER) -libqubes-rpc-filecopy.so.$(SO_VER): ioall.o copy-file.o crc32.o unpack.o pack.o - $(CC) $(LDFLAGS) -Wl,-soname,$@ -o $@ $^ +libqubes-rpc-filecopy.so.$(SO_VER): $(objs) + $(CC) $(LDFLAGS) -Wl,-soname,$@ -o $@ $^ $(LDLIBS) -%.a: +%.o: %.c + $(CC) $(CFLAGS) -MD -MP -MF $@.dep -c -o $@ $< +unpack.o: unpack-table.c +unpack-table.c: gentbl.py + python3 gentbl.py > unpack-table.c + +%.a: $(objs) $(AR) rcs $@ $^ clean: rm -f *.o *~ *.a *.so.* @@ -18,3 +26,4 @@ install: ln -s libqubes-rpc-filecopy.so.$(SO_VER) $(DESTDIR)$(LIBDIR)/libqubes-rpc-filecopy.so mkdir -p $(DESTDIR)$(INCLUDEDIR) cp libqubes-rpc-filecopy.h $(DESTDIR)$(INCLUDEDIR) +-include ./*.o.dep diff --git a/gentbl.py b/qrexec-lib/gentbl.py similarity index 90% rename from gentbl.py rename to qrexec-lib/gentbl.py index 96194d70..ba3e227a 100755 --- a/gentbl.py +++ b/qrexec-lib/gentbl.py @@ -2,8 +2,6 @@ import sys def main(): def print_interval(interval, last_cat): - if last_cat == 'Cn': - return if interval[0] != interval[1]: print(f' case 0x{interval[0]:X} ... 0x{interval[1]:X}: // category {last_cat}' .replace('category Cs', 'surrogates')) @@ -25,7 +23,7 @@ def print_interval(interval, last_cat): print_interval(interval, last_cat) interval = [i, i] print_interval(interval, last_cat) - print(' case 0x10FFFF ... UINT32_MAX: // too large') + print(' case 0x110000 ... UINT32_MAX: // too large') sys.stdout.flush() if __name__ == '__main__': main() diff --git a/qrexec-lib/unpack.c b/qrexec-lib/unpack.c index 8d5bfd5e..1fc666b6 100644 --- a/qrexec-lib/unpack.c +++ b/qrexec-lib/unpack.c @@ -242,43 +242,8 @@ static int validate_utf8_char(const unsigned char *untrusted_c) { } switch (code_point) { - case 0x0 ... 0x1F: // category Cc - case 0x7F ... 0x9F: // category Cc - case 0xA0: // category Zs - case 0xAD: // category Cf - case 0x600 ... 0x605: // category Cf - case 0x61C: // category Cf - case 0x6DD: // category Cf - case 0x70F: // category Cf - case 0x8E2: // category Cf - case 0x1680: // category Zs - case 0x180E: // category Cf - case 0x2000 ... 0x200A: // category Zs - case 0x200B ... 0x200F: // category Cf - case 0x2028: // category Zl - case 0x2029: // category Zp - case 0x202A ... 0x202E: // category Cf - case 0x202F: // category Zs - case 0x205F: // category Zs - case 0x2060 ... 0x2064: // category Cf - case 0x2066 ... 0x206F: // category Cf - case 0x3000: // category Zs - case 0xD800 ... 0xDFFF: // surrogates - case 0xE000 ... 0xF8FF: // category Co - case 0xFEFF: // category Cf - case 0xFFF9 ... 0xFFFB: // category Cf - case 0x110BD: // category Cf - case 0x110CD: // category Cf - case 0x13430 ... 0x13438: // category Cf - case 0x1BCA0 ... 0x1BCA3: // category Cf - case 0x1D173 ... 0x1D17A: // category Cf - case 0xE0001: // category Cf - case 0xE0020 ... 0xE007F: // category Cf - case 0xF0000 ... 0xFFFFD: // category Co - case 0x100000 ... 0x10FFFD: // category Co - case 0x110000 ... UINT32_MAX: // too large +#include "unpack-table.c" return 0; // Invalid UTF-8 or forbidden codepoint - default: return total_size; }