diff --git a/Makefile b/Makefile index eff6064..f6f1c65 100644 --- a/Makefile +++ b/Makefile @@ -19,77 +19,36 @@ # # -RPMS_DIR=rpm/ -VERSION := $(shell cat version) +PANDOC=pandoc -s -f markdown -t man +NAME := convert-pdf -help: - @echo "Qubes addons main Makefile:" ;\ - echo "make rpms <--- make rpms and sign them";\ - echo; \ - echo "make clean <--- clean all the binary files";\ - echo "make update-repo-current <-- copy newly generated rpms to qubes yum repo";\ - echo "make update-repo-current-testing <-- same, but for -current-testing repo";\ - echo "make update-repo-unstable <-- same, but to -testing repo";\ - echo "make update-repo-installer -- copy dom0 rpms to installer repo" - @exit 0; - -rpms: rpms-vm - -rpms-dom0: - rpmbuild --define "_rpmdir rpm/" -bb rpm_spec/qpdf-converter-dom0.spec - rpm --addsign rpm/x86_64/qubes-pdf-converter-dom0*$(VERSION)*.rpm - -rpms-vm: - rpmbuild --define "_rpmdir rpm/" -bb rpm_spec/qpdf-converter.spec - rpm --addsign rpm/x86_64/qubes-pdf-converter*$(VERSION)*.rpm - -update-repo-current: - for vmrepo in ../yum/current-release/current/vm/* ; do \ - dist=$$(basename $$vmrepo) ;\ - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter*$(VERSION)*$$dist*.rpm $$vmrepo/rpm/ ;\ - done - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter-dom0-*$(VERSION)*.rpm ../yum/current-release/current/dom0/rpm/ - -update-repo-current-testing: - for vmrepo in ../yum/current-release/current-testing/vm/* ; do \ - dist=$$(basename $$vmrepo) ;\ - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter*$(VERSION)*$$dist*.rpm $$vmrepo/rpm/ ;\ - done - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter-dom0-*$(VERSION)*.rpm ../yum/current-release/current-testing/dom0/rpm/ - -update-repo-unstable: - for vmrepo in ../yum/current-release/unstable/vm/* ; do \ - dist=$$(basename $$vmrepo) ;\ - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter*$(VERSION)*$$dist*.rpm $$vmrepo/rpm/ ;\ - done - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter-dom0-*$(VERSION)*.rpm ../yum/current-release/unstable/dom0/rpm/ - -update-repo-template: - for vmrepo in ../template-builder/yum_repo_qubes/* ; do \ - dist=$$(basename $$vmrepo) ;\ - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter*$(VERSION)*$$dist*.rpm $$vmrepo/rpm/ ;\ - done - -update-repo-installer: - ln -f $(RPMS_DIR)/x86_64/qubes-pdf-converter-dom0-*$(VERSION)*.rpm ../installer/yum/qubes-dom0/rpm/ - -build: - make manpages -C doc - -install-vm: - make install -C doc - install -D qvm-convert-pdf $(DESTDIR)/usr/bin/qvm-convert-pdf - install -D qpdf-convert-client $(DESTDIR)/usr/lib/qubes/qpdf-convert-client - install -D qpdf-convert-server $(DESTDIR)/usr/lib/qubes/qpdf-convert-server +install-vm: build + install -d $(DESTDIR)/usr/share/man/man1 + install -D qvm-$(NAME).1.gz $(DESTDIR)/usr/share/man/man1/ + python3 setup.py install -O1 $(PYTHON_PREFIX_ARG) --root $(DESTDIR) install -d $(DESTDIR)/etc/qubes-rpc ln -s ../../usr/lib/qubes/qpdf-convert-server $(DESTDIR)/etc/qubes-rpc/qubes.PdfConvert install -D qvm-convert-pdf.gnome $(DESTDIR)/usr/lib/qubes/qvm-convert-pdf.gnome install -d $(DESTDIR)/usr/share/nautilus-python/extensions - install -m 0755 qvm_convert_pdf_nautilus.py $(DESTDIR)/usr/share/nautilus-python/extensions + install -m 0644 qvm_convert_pdf_nautilus.py $(DESTDIR)/usr/share/nautilus-python/extensions install -d $(DESTDIR)/usr/share/kde4/services install -m 0644 qvm-convert-pdf.desktop $(DESTDIR)/usr/share/kde4/services install-dom0: python3 setup.py install -O1 --root $(DESTDIR) + # not needed in dom0 + rm -f $(DESTDIR)/usr/bin/qvm-convert-pdf + rm -f $(DESTDIR)/usr/lib/qubes/qpdf-convert-server + +qvm-$(NAME).1: README.md + $(PANDOC) $< > $@ + +qvm-$(NAME).1.gz: qvm-$(NAME).1 + gzip -f $< + +build: qvm-$(NAME).1.gz clean: + rm -rf debian/changelog.* + rm -rf pkgs + rm -f qvm-$(NAME).1.gz diff --git a/Makefile.builder b/Makefile.builder index 1ca07dd..6a84a6c 100644 --- a/Makefile.builder +++ b/Makefile.builder @@ -1,12 +1,12 @@ ifeq ($(PACKAGE_SET),dom0) RPM_SPEC_FILES := rpm_spec/qpdf-converter-dom0.spec else ifeq ($(PACKAGE_SET),vm) - ifneq ($(filter $(DISTRIBUTION), debian qubuntu),) + # needs python 3.7+ - exclude stretch, jessie, and centos[78] + ifeq ($(filter $(DIST), stretch jessie centos7 centos8),) DEBIAN_BUILD_DIRS := debian + RPM_SPEC_FILES := rpm_spec/qpdf-converter.spec + ARCH_BUILD_DIRS := archlinux endif - - RPM_SPEC_FILES := rpm_spec/qpdf-converter.spec - ARCH_BUILD_DIRS := archlinux endif # vim: filetype=make diff --git a/README.md b/README.md index 0e5b57b..8faa073 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ qvm-convert-pdf - converts a potentially untrusted file to a safe-to-view pdf SYNOPSIS ----------- ```bash -qvm-convert-pdf +qvm-convert-pdf ... ``` DESCRIPTION diff --git a/archlinux/PKGBUILD b/archlinux/PKGBUILD index 8022832..1dc6e17 100644 --- a/archlinux/PKGBUILD +++ b/archlinux/PKGBUILD @@ -5,13 +5,14 @@ arch=(x86_64) pkgdesc=$(grep "Summary:" ./rpm_spec/qpdf-converter.spec.in | sed 's/Summary://' | xargs) url=$(git remote get-url origin) license=(GPL) -depends=(libreoffice graphicsmagick zenity poppler python-nautilus file net-tools) +makedepends=(git pandoc python-setuptools) +depends=(libreoffice graphicsmagick zenity poppler python-nautilus python-click python-pillow python-tqdm python-magic) build() { ln -s "$srcdir"/../ "$srcdir/src" } check(){ - src/dev_tools/run.sh + src/tests/all } package() { cd src diff --git a/ci/requirements.txt b/ci/requirements.txt new file mode 100644 index 0000000..f3aeabd --- /dev/null +++ b/ci/requirements.txt @@ -0,0 +1,6 @@ +# WARNING: those requirements are used only for travis-ci.org +# they SHOULD NOT be used under normal conditions; use system package manager +click +pillow +pylint +tqdm diff --git a/debian/changelog b/debian/changelog index ded4308..29f5345 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,9 @@ -qubes-pdf-converter (3.0.0-1) unstable; urgency=medium - - [ Neowutran ] - * Add support for libreoffice files type - * Add support for password protected files +qubes-pdf-converter (2.1.8-1) unstable; urgency=medium - -- Marek Marczykowski-Górecki Sat, 25 Apr 2020 20:41:05 +0100 + [ Frédéric Pierret (fepitre) ] + * Drop python2 + + -- Marek Marczykowski-Górecki Mon, 25 May 2020 04:16:24 +0200 qubes-pdf-converter (2.1.7-1) unstable; urgency=medium diff --git a/debian/control b/debian/control index 0a317bf..7ab7a64 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: qubes-pdf-converter Section: admin Priority: optional Maintainer: Jason Mehring -Build-Depends: pandoc, debhelper (>= 9) +Build-Depends: pandoc, python3-setuptools, debhelper (>= 9) # For the futures version of debian, delete the "compat" file, and add the line below in "Build-Depends" # debhelper-compat (= 12) Standards-Version: 4.5.0 @@ -11,5 +11,15 @@ Homepage: https://github.com/QubesOS/qubes-app-linux-pdf-converter Package: qubes-pdf-converter Section: admin Architecture: any -Depends: poppler-utils, net-tools, file, libreoffice, graphicsmagick, python-nautilus, ${misc:Depends} +Depends: + poppler-utils, + libreoffice, + graphicsmagick, + python3 (>= 3.7.0), + python3-nautilus | python-nautilus, + python3-click, + python3-pillow, + python3-tqdm, + python3-magic, + ${misc:Depends} Description: The Qubes service for converting untrusted PDF files into trusted ones diff --git a/debian/qubes-pdf-converter.install b/debian/qubes-pdf-converter.install index 7a6a94b..bd1bf02 100644 --- a/debian/qubes-pdf-converter.install +++ b/debian/qubes-pdf-converter.install @@ -1,4 +1,3 @@ -usr/lib/qubes/qpdf-convert-client usr/lib/qubes/qpdf-convert-server etc/qubes-rpc/qubes.PdfConvert usr/bin/qvm-convert-pdf @@ -7,3 +6,5 @@ usr/share/nautilus-python/extensions usr/share/nautilus-python/extensions/qvm_convert_pdf_nautilus.py usr/share/kde4/services/qvm-convert-pdf.desktop usr/share/man/man1/qvm-convert-pdf.1.gz +usr/lib/python3/dist-packages/qubespdfconverter +usr/lib/python3/dist-packages/qubespdfconverter-*.egg-info diff --git a/debian/rules b/debian/rules index 10181e5..d8dc1d2 100755 --- a/debian/rules +++ b/debian/rules @@ -3,6 +3,7 @@ # Uncomment this to turn on verbose mode. #export DH_VERBOSE=1 +export PYTHON_PREFIX_ARG=--install-layout=deb export DESTDIR=$(shell readlink -m .)/debian/tmp @@ -11,3 +12,7 @@ export DESTDIR=$(shell readlink -m .)/debian/tmp override_dh_auto_install: make install-vm + +override_dh_missing: + dh_missing --fail-missing + diff --git a/debian/tests/control b/debian/tests/control new file mode 100644 index 0000000..c636d88 --- /dev/null +++ b/debian/tests/control @@ -0,0 +1,2 @@ +Tests: all +Depends: @ diff --git a/dev_tools/run.sh b/dev_tools/run.sh deleted file mode 100755 index 548a177..0000000 --- a/dev_tools/run.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -trap 'echo "Conversion failed!" 1>&2' ERR -relative_directory=$(dirname "$0") -password=toor - -rm "$relative_directory"/files_success/*.trusted.pdf - -for file in "$relative_directory"/files_success/*; do - echo "Converting $file" - set -eE - $(coproc "$relative_directory"/../qpdf-convert-client "$file" "$password"; "$relative_directory"/../qpdf-convert-server <&"${COPROC[0]}" >&"${COPROC[1]}") - set +eE -done -sleep 10 && mv /home/user/QubesUntrustedPDFs/* "$relative_directory"/files_success/ - - - - -for file in "$relative_directory"/files_error/*; do - echo "Converting $file" - $(coproc "$relative_directory"/../qpdf-convert-client "$file" "$password"; "$relative_directory"/../qpdf-convert-server <&"${COPROC[0]}" >&"${COPROC[1]}") - error_code=$? - if [ $error_code -eq 0 ]; then - echo "The conversion should be failing" - exit 1 - fi -done - -echo "Everything seems to be OK" -exit 0 diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index b42a0f5..0000000 --- a/doc/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -PANDOC=pandoc -s -f markdown -t man - -DOCS=$(patsubst %.md,%.1.gz,$(wildcard *.md)) - -help: - @echo "make md=example.md preview -- generate manpage preview from example.md" - @echo "make manpages -- generate manpages" - @echo "make install -- generate manpages and copy them to /usr/share/man" - -install: manpages - install -d $(DESTDIR)/usr/share/man/man1 - install -D $(DOCS) $(DESTDIR)/usr/share/man/man1/ - -%.1: %.md - $(PANDOC) $< > $@ - -%.1.gz: %.1 - gzip -f $< - -manpages: $(DOCS) - -clean: - rm -f $(DOCS) diff --git a/doc/qvm-convert-pdf.md b/doc/qvm-convert-pdf.md deleted file mode 120000 index 32d46ee..0000000 --- a/doc/qvm-convert-pdf.md +++ /dev/null @@ -1 +0,0 @@ -../README.md \ No newline at end of file diff --git a/qpdf-convert-client b/qpdf-convert-client deleted file mode 100755 index b6a7d4f..0000000 --- a/qpdf-convert-client +++ /dev/null @@ -1,149 +0,0 @@ -#!/bin/bash -# -# The Qubes OS Project, http://www.qubes-os.org -# -# Copyright (C) 2013 Joanna Rutkowska -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -INPUT_FILE=${1?Input file required} -PASSWORD=${2-Dummy} -RCVD_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX) -CONVERTED_FILE="$(dirname "$1")/$(basename "$1" .pdf).trusted.pdf" -CONVERTED_FILE_PARTIAL="$CONVERTED_FILE".part.pdf - -MAX_PAGES=10000 -MAX_IMG_WIDTH=10000 -MAX_IMG_HEIGHT=10000 -IMG_DEPTH=8 -MAX_IMG_SIZE=$((MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3)) - -VERBOSE=1 -if [ -n "$PROGRESS_FOR_GUI" ]; then - VERBOSE=0; -fi - -die() { - reason="$1" - if [ -n "$PROGRESS_FOR_GUI" ]; then - zenity --error --title="PDF conversion error" --text="$reason" - else - echo "$reason" >&2 - fi - exit 1 -} - - -# Send the input (untrusted) file to the server... -[ $VERBOSE -ge 1 ] && echo "-> Sending file to a Disposable VM..." >&2 -echo "$PASSWORD" -[ -n "$PROGRESS_FOR_GUI" ] && echo "1" || echo "0" -cat "$INPUT_FILE" -exec >&- - -# ... and get the recvd *simple* representation: - -# Note: the server might be compromised at this point so, it can very well send -# us something else than the simple representation. Thus we explicitly specify -# input format to GraphicsMagick's convert via "rgb:" prefix, forcing it to -# interpret whatever stream of bytes it gets on input as a simple RGB array. We -# hope that when using this RGB format explicitly (which is the simplest format -# for bitmaps in the known universe), there is no space for offending bug in -# image parsing... - -# First, get the no of pages: -read -r NO_PAGES -if [[ ! "$NO_PAGES" =~ ^[1-9][0-9]*$ ]] || [[ $NO_PAGES -le 0 ]] || [[ $NO_PAGES -gt $MAX_PAGES ]] ; then - die "The remote party return invalid no of pages, aborting!" -fi - -[ $VERBOSE -ge 1 ] && echo "-> Waiting for converted samples..." >&2 - -PAGE=1 -while [ $PAGE -le "$NO_PAGES" ]; do - read -r IMG_WIDTH IMG_HEIGHT - if [ $VERBOSE -eq 1 ]; then - echo -n "-> Receiving page $PAGE out of $NO_PAGES..." >&2 - printf "\r" >&2 - elif [ $VERBOSE -gt 1 ]; then - echo "-> Receiving page $PAGE out of $NO_PAGES..." >&2 - fi - if [[ ! "$IMG_WIDTH" =~ ^[1-9][0-9]*$ ]] || [ "$IMG_WIDTH" -le 0 ] || [ "$IMG_WIDTH" -gt $MAX_IMG_WIDTH ] || \ - [[ ! "$IMG_HEIGHT" =~ ^[1-9][0-9]*$ ]] || [ "$IMG_HEIGHT" -le 0 ] || [ "$IMG_HEIGHT" -gt $MAX_IMG_HEIGHT ]; then - die "The remote party return invalid image geometry info, aborting!" - fi - [ $VERBOSE -ge 2 ] && echo "--> page geometry: $IMG_WIDTH x $IMG_HEIGHT x $IMG_DEPTH" >&2 - IMG_SIZE=$((IMG_WIDTH*IMG_HEIGHT*3)) - if [ $IMG_SIZE -le 0 ] || [ $IMG_SIZE -gt $MAX_IMG_SIZE ]; then - die "Calculated image size is invalid, aborting!" - fi - # save the simplified RGB image into a temp PDF file: - RGB_FILE=$RCVD_FILE-$PAGE.rgb - PNG_FILE=$RCVD_FILE-$PAGE.png - PDF_FILE=$RCVD_FILE-$PAGE.pdf - head -c $IMG_SIZE > "$RGB_FILE" - RCVD_IMG_SIZE=$(stat -c %s "$RGB_FILE") - if [ "$RCVD_IMG_SIZE" -ne $IMG_SIZE ]; then - die "The remote party return invalid no of bytes of the RGB file, aborting!" - fi - # here, the important part is that we *explicitly* specify RGB as the input format via "rgb:" - # We first convert to a (compressed) PNG to create smaller output files - convert_msgs=$(gm convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILE" png:"$PNG_FILE" 2>&1) - # shellcheck disable=SC2181 - if [ $? -ne 0 ]; then - die "Page $PAGE conversion failed (RGB->PNG): $convert_msgs" - fi - rm -f "$RGB_FILE" - - # now convert the (trusted but compressed) PNG into PDF for easy assembly... - convert_msgs=$(gm convert "$PNG_FILE" "$PDF_FILE" 2>&1) - # shellcheck disable=SC2181 - if [ $? -ne 0 ]; then - die "Page $PAGE conversion failed (PNG->PDF): $convert_msgs" - fi - rm -f "$PNG_FILE" - - if [ $PAGE -gt 1 ]; then - convert_msgs=$(pdfunite "$CONVERTED_FILE" "$PDF_FILE" "$CONVERTED_FILE_PARTIAL" 2>&1) - # shellcheck disable=SC2181 - if [ $? -ne 0 ]; then - die "Error merging converted page: $convert_msgs" - fi - mv "$CONVERTED_FILE_PARTIAL" "$CONVERTED_FILE" || die - else - mv "$PDF_FILE" "$CONVERTED_FILE" || die - fi - rm -f "$PDF_FILE" || die - - PAGE=$((PAGE+1)) - - [ -n "$PROGRESS_FOR_GUI" ] && echo $(((PAGE - 1) * 90 / NO_PAGES)) >& "$SAVED_FD_1" -done - -if [ $VERBOSE -eq 1 ]; then - echo >&2 -fi - -[ $VERBOSE -ge 1 ] && echo "-> Converted PDF saved as: $CONVERTED_FILE" >&2 - -mkdir -p "$HOME/QubesUntrustedPDFs" -ORIG_FILE="$HOME/QubesUntrustedPDFs/$(basename "$INPUT_FILE")" -mv "$INPUT_FILE" "${ORIG_FILE}" || die "Moving original file failed" -[ $VERBOSE -ge 1 ] && echo "-> Original file saved as $ORIG_FILE" >&2 - -# Cleanup -rm -f "$RCVD_FILE"* -[ -n "$PROGRESS_FOR_GUI" ] && echo "100" >& "$SAVED_FD_1" -exit 0 diff --git a/qpdf-convert-server b/qpdf-convert-server deleted file mode 100755 index a9ff478..0000000 --- a/qpdf-convert-server +++ /dev/null @@ -1,156 +0,0 @@ -#!/bin/bash -# -# The Qubes OS Project, http://www.qubes-os.org -# -# Copyright (C) 2013 Joanna Rutkowska -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -INPUT_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX) -TEMP_PNG_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX.png) -TEMP_RGB_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX.pdf) -IMG_DEPTH=8 -read -r PASSWORD -read -r GUI - -read_password(){ - PASSWORD_SUCCESS=$1 - if [ "$PASSWORD_SUCCESS" -eq 0 ]; then - if [ "$GUI" -eq 1 ]; then - PASSWORD=$(zenity --title 'File protected by password' --password) - else - echo "Incorrect password" >&2 - exit 1 - fi - fi -} - -# Get the original (untrusted) PDF file... -cat > "$INPUT_FILE" - -# The project "Dangerzone" reused the idea of this script based on https://blog.invisiblethings.org/2013/02/21/converting-untrusted-pdfs-into-trusted.html : -# https://github.com/firstlookmedia/dangerzone-converter https://dangerzone.rocks/ https://github.com/firstlookmedia/dangerzone -# Dangerzone try to export the idea to non Qubes based system, and try to improve it. -# Both projects can improve the other. - -# I choose to not include the OCR function that have been added to Dangerzone. -# OCR lead to searchable text PDF file, so it improve the UX. -# It also increase the attack surface: the resulting PDF file is more complex. And OCR could be fooled to change some of the meaning of the text, however I do not truly understand -# the impact this has for the use case, I believe it is quite minor as the document will be read by a human who know this come from a possibly unreliable source. -# ( https://arxiv.org/abs/1802.05385 https://arxiv.org/abs/2002.03095 https://medium.com/@sharon.qian.10/adversarial-robustness-of-optical-character-recognition-ocr-91eedc36ef6 )) - -# Find the file mime type -MIMETYPE=$(file -b --mime-type "$INPUT_FILE") -PASSWORD_SUCCESS=0 - -# And convert it to a PDF file -case "$MIMETYPE" in -video/* | audio/*) - echo "The file type $MIMETYPE is currently not supported" >&2 - exit 1 - ;; -image/*) - ;; -application/pdf) - while [ $PASSWORD_SUCCESS -eq 0 ] ; do - pdfinfo -opw "$PASSWORD" -upw "$PASSWORD" "$INPUT_FILE" 2>&1 | grep "Incorrect password" >&2 - ERROR_CODE=$? - read_password $(( ERROR_CODE != 0 )) - done - ;; -*) - # Start libreoffice server - libreoffice --accept='socket,host=localhost,port=2202;urp;' --norestore --nologo --nodefault >/dev/null 2>/dev/null & - listener_notready=1 - - # Wait until libreoffice server is started - while [ $listener_notready -ne 0 ]; - do - sleep 1 - netstat -anop 2> /dev/null | grep '127.0.0.1:2202' | grep LISTEN >/dev/null 2>/dev/null - listener_notready=$? - done - - # Remove password from file using libreoffice API - while [ $PASSWORD_SUCCESS -eq 0 ] ; do - python3 -c ' -import os -import uno -from com.sun.star.beans import PropertyValue -import sys - -src="file://'"$INPUT_FILE"'" -dst="file://'"$INPUT_FILE.nopassword"'" -password="'"$PASSWORD"'" - -localContext = uno.getComponentContext() -resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) -ctx = resolver.resolve("uno:socket,host=localhost,port=2202;urp;StarOffice.ComponentContext") -smgr = ctx.ServiceManager -desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) - -hidden_property = PropertyValue() -hidden_property.Name = "Hidden" -hidden_property.Value = True - -password_property = PropertyValue() -password_property.Name = "Password" -password_property.Value = password - -document = desktop.loadComponentFromURL(src, "_blank", 0, (password_property, hidden_property,)) -document.storeAsURL(dst, ())' >&2 - ERROR_CODE=$? - read_password $(( ERROR_CODE == 0 )) - done - libreoffice --convert-to pdf "$INPUT_FILE.nopassword" --outdir /tmp/ >&2 - mv "$INPUT_FILE"".pdf" "$INPUT_FILE" - ;; -esac -# now, let's convert it into a simple representation, -# and send back to the client. - -# Note that we might be compromised at this point (due to exploitation of PDF -# parsing code) and so what we're sending back might very well be something -# totally different than a decent simple representation -- the client should -# never trust what we're sending back, and should discard anything that doesn't -# look like the simple representation! - -NO_PAGES=$(pdfinfo -opw "$PASSWORD" -upw "$PASSWORD" "$INPUT_FILE" | grep -a "^Pages:" | sed -e "s/^Pages:[^0-9]*//") -if [ -z "$NO_PAGES" ]; then - # Perhaps this is not a PDF, only some JPG/PNG/etc? Let's try it anyway... - NO_PAGES=1 -fi -echo $NO_PAGES - -cd /tmp || exit 1 -PAGE=1 -while [ $PAGE -le $NO_PAGES ]; do - # if pdftocairo fails, lets try the GraphicsMagick's convert -- perhaps this is just some img file? - pdftocairo -opw "$PASSWORD" -upw "$PASSWORD" "$INPUT_FILE" -png -f $PAGE -l $PAGE -singlefile "$(basename "$TEMP_PNG_FILE" .png)" || \ - gm convert "$INPUT_FILE" png:"$TEMP_PNG_FILE" - IMG_WIDTH=$(identify -format "%w" "$TEMP_PNG_FILE") - IMG_HEIGHT=$(identify -format "%h" "$TEMP_PNG_FILE") - gm convert "$TEMP_PNG_FILE" -depth $IMG_DEPTH rgb:"$TEMP_RGB_FILE" - echo "$IMG_WIDTH $IMG_HEIGHT" - cat "$TEMP_RGB_FILE" - PAGE=$((PAGE + 1)) -done - -# Cleanup tmp files... -# Note: our DispVM might get destroyed before the commands below -# complete, but that doesn't hurt us, because this is... well a DispVM. -rm -f "$INPUT_FILE" -rm -f "$TEMP_PNG_FILE" -rm -f "$TEMP_RGB_FILE" diff --git a/qubespdfconverter/__init__.py b/qubespdfconverter/__init__.py index a178c84..e69de29 100644 --- a/qubespdfconverter/__init__.py +++ b/qubespdfconverter/__init__.py @@ -1 +0,0 @@ -# pylint: no-file diff --git a/qubespdfconverter/client.py b/qubespdfconverter/client.py new file mode 100755 index 0000000..79c016e --- /dev/null +++ b/qubespdfconverter/client.py @@ -0,0 +1,702 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +# The Qubes OS Project, http://www.qubes-os.org +# +# Copyright (C) 2013 Joanna Rutkowska +# Copyright (C) 2020 Jason Phan +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import asyncio +import functools +import logging +import shutil +import signal +import subprocess +import sys +from enum import Enum, auto +from dataclasses import dataclass +from pathlib import Path +from tempfile import TemporaryDirectory +from PIL import Image +import tqdm +import click + +def get_dispvm_template(): + try: + config_file = open("/rw/config/PdfConvert_dispvm", "r") + return config_file.readline().splitlines()[0] + except: + return None + +CLIENT_VM_CMD = ["/usr/bin/qrexec-client-vm", "@dispvm", "qubes.PdfConvert"] +DISPVM_TEMPLATE = get_dispvm_template() +if DISPVM_TEMPLATE: + CLIENT_VM_CMD = ["/usr/bin/qrexec-client-vm", "@dispvm:"+DISPVM_TEMPLATE, "qubes.PdfConvert"] + +MAX_PAGES = 10000 +MAX_IMG_WIDTH = 10000 +MAX_IMG_HEIGHT = 10000 +DEPTH = 8 + +ERROR_LOGS = asyncio.Queue() + + +class Status(Enum): + """Sanitization job status""" + DONE = auto() + FAIL = auto() + CANCELLED = auto() + + +@dataclass(frozen=True) +class ImageDimensions: + width: int + height: int + size: int + depth: int = DEPTH + + +class DimensionError(ValueError): + """Raised if invalid image dimensions were received""" + + +class PageError(ValueError): + """Raised if an invalid number of pages was received""" + + +class QrexecError(Exception): + """Raised if a qrexec-related error occured""" + + +class RepresentationError(Exception): + """Raised if an representation-related error occurred""" + + +class BadPath(click.BadParameter): + """Raised if a Path object parsed by Click is invalid""" + def __init__(self, path, message): + super().__init__(message, param_hint=f'"{path}"') + + +async def sigint_handler(tasks): + await asyncio.gather(*[cancel_task(t) for t in tasks]) + + +def modify_click_errors(func): + """Decorator for replacing Click behavior on errors""" + + def show(self, file=None): + """Removes usage message from UsageError error messages""" + color = None + + if file is None: + file = click._compat.get_text_stderr() + + if self.ctx is not None: + color = self.ctx.color + + click.echo(f"{self.format_message()}", file=file, color=color) + + + def format_message(self): + """Removes 'Invalid value' from BadParameter error messages""" + if self.param_hint is not None: + prefix = self.param_hint + elif self.param is not None: + prefix = self.param.get_error_hint(self.ctx) + else: + return self.message + prefix = click.exceptions._join_param_hints(prefix) + + return f"{prefix}: {self.message}" + + click.exceptions.BadParameter.format_message = format_message + click.exceptions.UsageError.show = show + + return func + + +def validate_paths(ctx, param, untrusted_paths): + """Callback for validating file paths parsed by Click""" + for untrusted_path in untrusted_paths: + if not untrusted_path.resolve().exists(): + raise BadPath(untrusted_path, "No such file or directory") + + if not untrusted_path.resolve().is_file(): + raise BadPath(untrusted_path, "Not a regular file") + + try: + with untrusted_path.resolve().open("rb"): + pass + except PermissionError as e: + raise BadPath(untrusted_path, "Not readable") from e + + paths = untrusted_paths + return paths + + +async def cancel_task(task): + task.cancel() + try: + await task + except: + pass + + +async def terminate_proc(proc): + if proc.returncode is None: + proc.terminate() + await proc.wait() + + +async def wait_proc(proc, cmd): + try: + await proc.wait() + except asyncio.CancelledError: + await terminate_proc(proc) + raise + + if proc.returncode: + raise subprocess.CalledProcessError(proc.returncode, cmd) + + +async def send(proc, data): + """Qrexec wrapper for sending data to the server""" + if isinstance(data, (int, str)): + data = str(data).encode() + + proc.stdin.write(data) + await proc.stdin.drain() + + +async def recv_b(proc, size): + """Qrexec wrapper for receiving binary data from the server""" + return await proc.stdout.readexactly(size) + + +async def recvline(proc): + """Qrexec wrapper for receiving a line of text data from the server""" + untrusted_data = await proc.stdout.readline() + if not untrusted_data: + raise EOFError + return untrusted_data.decode("ascii").rstrip() + + +class Tqdm(tqdm.tqdm): + def set_status(self, status): + prefix = self.desc[:self.desc.rfind('.') + 1] + self.set_description_str(prefix + status) + self.refresh() + + + def set_job_status(self, status): + self.set_status(status.name.lower()) + + +class Representation: + """Umbrella object for a file's initial and final representations + + The initial representation must be of a format such that if it contains + malicious code/data, such code/data is excluded from the final + representation upon conversion. Generally, this restricts the initial + representation to a relatively simple format (e.g., RGB bitmap). + + The final representation can be of any format you'd like, provided that + the initial representation's format was properly selected (e.g., PNG). + + :param prefix: Path prefixes for representations + :param f_suffix: File extension of initial representation (without .) + :param i_suffix: File extension of final representation (without .) + """ + + def __init__(self, prefix, i_suffix, f_suffix): + """ + :param initial: File path to initial representation + :param final: File path final representation + :param dim: Image dimensions received from the server + """ + self.initial = prefix.with_suffix(f".{i_suffix}") + self.final = prefix.with_suffix(f".{f_suffix}") + self.dim = None + + + async def convert(self, bar): + """Convert initial representation into final representation + + :param bar: Progress bar to update upon completion + """ + cmd = [ + "gm", + "convert", + "-size", + f"{self.dim.width}x{self.dim.height}", + "-depth", + f"{self.dim.depth}", + f"rgb:{self.initial}", + f"png:{self.final}" + ] + + proc = await asyncio.create_subprocess_exec(*cmd) + + try: + await wait_proc(proc, cmd) + except subprocess.CalledProcessError as e: + raise RepresentationError("Failed to convert representation") from e + + await asyncio.get_running_loop().run_in_executor( + None, + self.initial.unlink + ) + + bar.update(1) + bar.set_status(f"{bar.n}/{bar.total}") + + + async def receive(self, proc): + """Receive initial representation from the server + + :param proc: qrexec-client-vm process + """ + try: + self.dim = await self._dim(proc) + except EOFError as e: + raise QrexecError("Failed to receive image dimensions") from e + except (AttributeError, UnicodeError, ValueError) as e: + raise DimensionError("Invalid image dimensions") from e + + try: + data = await recv_b(proc, self.dim.size) + except asyncio.IncompleteReadError as e: + raise QrexecError("Received inconsistent number of bytes") from e + + await asyncio.get_running_loop().run_in_executor( + None, + self.initial.write_bytes, + data + ) + + + async def _dim(self, proc): + """Receive and compute image dimensions for initial representation + + :param proc: qrexec-client-vm process + """ + untrusted_w, untrusted_h = map(int, (await recvline(proc)).split(" ", 1)) + + if 1 <= untrusted_w <= MAX_IMG_WIDTH and 1 <= untrusted_h <= MAX_IMG_HEIGHT: + width = untrusted_w + height = untrusted_h + size = width * height * 3 + else: + raise ValueError + return ImageDimensions(width, height, size) + + +@dataclass(frozen=True) +class BatchEntry: + task: asyncio.Task + rep: Representation + + +class BaseFile: + """An unsanitized file + + :param path: Path to original, unsanitized file + :param pagenums: Number of pages in original file + :param pdf: Path to temporary final PDf + """ + + def __init__(self, path, pagenums, pdf): + """ + :param path: @path + :param pagenums: @pagenums + :param batch: Conversion queue + """ + self.path = path + self.pagenums = pagenums + self.pdf = pdf + self.batch = None + + + async def sanitize(self, proc, bar, depth): + """Receive and convert representation files + + :param archive: Path to archive directory + :param depth: Conversion queue size + :param in_place: Value of --in-place flag + """ + self.batch = asyncio.Queue(depth) + + publish_task = asyncio.create_task(self._publish(proc, bar)) + consume_task = asyncio.create_task(self._consume()) + + try: + await asyncio.gather(publish_task, consume_task) + finally: + if not publish_task.done(): + await cancel_task(publish_task) + + if not consume_task.done(): + await cancel_task(consume_task) + + while not self.batch.empty(): + batch_e = await self.batch.get() + await cancel_task(batch_e.task) + self.batch.task_done() + + + async def _publish(self, proc, bar): + """Receive initial representations and start their conversions""" + pages = [] + + for page in range(1, self.pagenums + 1): + rep = Representation(Path(self.pdf.parent, str(page)), "rgb", "png") + await rep.receive(proc) + + task = asyncio.create_task(rep.convert(bar)) + batch_e = BatchEntry(task, rep) + + try: + await self.batch.put(batch_e) + except asyncio.CancelledError: + await cancel_task(task) + raise + + pages.append(page) + + if page % self.batch.maxsize == 0 or page == self.pagenums: + await self.batch.join() + await self._save_reps(pages) + pages = [] + + + async def _consume(self): + """Convert initial representations to final form and save as PDF""" + for _ in range(1, self.pagenums + 1): + batch_e = await self.batch.get() + await batch_e.task + self.batch.task_done() + + + async def _save_reps(self, pages): + """Save final representations to a PDF file""" + images = [] + + for page in pages: + try: + images.append( + await asyncio.get_running_loop().run_in_executor( + None, + Image.open, + Path(self.pdf.parent, f"{page}.png") + ) + ) + except IOError as e: + for image in images: + await asyncio.get_running_loop().run_in_executor( + None, + image.close + ) + raise RepresentationError("Failed to open representation") from e + + try: + await asyncio.get_running_loop().run_in_executor( + None, + functools.partial(images[0].save, + self.pdf, + "PDF", + resolution=100, + append=self.pdf.exists(), + append_images=images[1:], + save_all=True) + ) + except IOError as e: + raise RepresentationError("Failed to save representation") from e + finally: + for image, page in zip(images, pages): + await asyncio.get_running_loop().run_in_executor( + None, + image.close + ) + await asyncio.get_running_loop().run_in_executor( + None, + Path(self.pdf.parent, f"{page}.png").unlink + ) + + +class Job: + """A sanitization job + + :param path: Path to original, unsanitized file + :param pos: Bar position + """ + + def __init__(self, path, pos): + """ + + :param file: Base file + :param bar: Progress bar + :param proc: qrexec-client-vm process + :param pdf: Path to temporary PDF for appending representations + """ + self.path = path + self.bar = Tqdm(desc=f"{path}...0/?", + bar_format=" {desc}", + position=pos) + self.base = None + self.proc = None + self.pdf = None + self.password = None + self.gui = False + + + async def run(self, archive, depth, in_place, password, gui): + self.password = "" if password is None else password + self.gui = gui + + + self.proc = await asyncio.create_subprocess_exec( + *CLIENT_VM_CMD, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE + ) + + with TemporaryDirectory(prefix="qvm-sanitize-") as tmpdir: + try: + await self._setup(tmpdir) + await self._start(archive, depth, in_place) + except (OSError, + PageError, + QrexecError, + DimensionError, + RepresentationError, + subprocess.CalledProcessError) as e: + # Since the qrexec-client-vm subprocesses belong to the same + # process group, when a SIGINT is issued, it's sent to each one. + # Consequently, there's a race between the signal and our + # cleanup code. Occasionally, the signal wins and causes some + # qrexec-client-vm subprocesses to exit, potentially during an + # operation (e.g., a STDOUT read), thereby raising an exception + # not expected by the cleanup code. + if self.proc.returncode == -signal.SIGINT: + self.bar.set_job_status(Status.CANCELLED) + raise asyncio.CancelledError + + self.bar.set_job_status(Status.FAIL) + await ERROR_LOGS.put(f"{self.path.name}: {e}") + if self.proc.returncode is not None: + await terminate_proc(self.proc) + raise + except asyncio.CancelledError: + self.bar.set_job_status(Status.CANCELLED) + raise + + self.bar.set_job_status(Status.DONE) + + + async def _setup(self, tmpdir): + send_task = asyncio.create_task(self._send()) + page_task = asyncio.create_task(self._pagenums()) + + try: + _, pagenums = await asyncio.gather(send_task, page_task) + except QrexecError: + await cancel_task(page_task) + raise + else: + try: + self.bar.reset(total=pagenums) + except AttributeError: + self.bar.total = pagenums + self.bar.refresh() + + self.pdf = Path(tmpdir, self.path.with_suffix(".trusted.pdf").name) + self.base = BaseFile(self.path, pagenums, self.pdf) + + + async def _start(self, archive, depth, in_place): + await self.base.sanitize( + self.proc, + self.bar, + depth + ) + await wait_proc(self.proc, CLIENT_VM_CMD) + + await asyncio.get_running_loop().run_in_executor( + None, + shutil.move, + self.pdf, + Path(self.path.parent, self.pdf.name) + ) + + if in_place: + try: + await asyncio.get_running_loop().run_in_executor( + None, + self.path.unlink + ) + except FileNotFoundError: + pass + else: + await asyncio.get_running_loop().run_in_executor( + None, + self._archive, + archive + ) + + + async def _send(self): + """Send original document to server""" + data = await asyncio.get_running_loop().run_in_executor( + None, + self.path.read_bytes + ) + gui_msg = "1" if self.gui else "0" + try: + await send(self.proc, self.password + "\n") + await send(self.proc, gui_msg + "\n") + await send(self.proc, data) + except BrokenPipeError as e: + raise QrexecError("Failed to send PDF") from e + else: + self.proc.stdin.write_eof() + + + async def _pagenums(self): + """Receive number of pages in original document from server""" + try: + untrusted_pagenums = int(await recvline(self.proc)) + except (AttributeError, EOFError, UnicodeError, ValueError) as e: + raise QrexecError("Failed to receive page count") from e + + if 1 <= untrusted_pagenums <= MAX_PAGES: + pagenums = untrusted_pagenums + else: + raise PageError("Invalid page count") + + return pagenums + + + def _archive(self, archive): + """Move original file into an archival directory""" + Path.mkdir(archive, exist_ok=True) + self.path.rename(Path(archive, self.path.name)) + + +async def run(params): + suffix = "s" if len(params["files"]) > 1 else "" + print(f"Sending file{suffix} to Disposable VM{suffix}...\n") + tasks = [] + jobs = [Job(f, i) for i, f in enumerate(params["files"])] + for job in jobs: + tasks.append(asyncio.create_task(job.run(params["archive"], + params["batch"], + params["in_place"], + params["password"], + params["gui"]))) + + asyncio.get_running_loop().add_signal_handler( + signal.SIGINT, + lambda: asyncio.ensure_future(sigint_handler(tasks)) + ) + + results = await asyncio.gather(*tasks, return_exceptions=True) + completed = results.count(None) + + for job in jobs: + job.bar.close() + + if ERROR_LOGS.empty(): + if tqdm.__version__ >= "4.34.0": + newlines = "\n" + else: + newlines = "\n" if len(jobs) == 1 else "\n" * (len(jobs) + 1) + else: + newlines = "\n" + + if tqdm.__version__ >= "4.34.0": + print() + else: + if len(jobs) == 1: + print() + else: + print("\n" * len(jobs)) + + while not ERROR_LOGS.empty(): + err_msg = await ERROR_LOGS.get() + logging.error(err_msg) + ERROR_LOGS.task_done() + + print(f"{newlines}Total Sanitized Files: {completed}/{len(results)}") + + return completed != len(results) + + +@click.command() +@click.option( + "-b", + "--batch", + type=click.IntRange(1), + default=50, + metavar="SIZE", + help="Maximum number of conversion tasks" +) +@click.option( + "-a", + "--archive", + type=Path, + default=Path(Path.home(), "QubesUntrustedPDFs"), + metavar="PATH", + help="Directory for storing archived files" +) +@click.option( + "-i", + "--in-place", + is_flag=True, + help="Replace original files instead of archiving them" +) +@click.option( + "-g", + "--gui", + is_flag=True, + help="Allow GUI popup to be displayed" +) +@click.option( + "-p", + "--password", + help="Password for reading the file." +) +@click.argument( + "files", + type=Path, + nargs=-1, + callback=validate_paths, + metavar="[FILES ...]" +) +@modify_click_errors +def main(**params): + logging.basicConfig(format="error: %(message)s") + + if params["files"]: + loop = asyncio.get_event_loop() + sys.exit(loop.run_until_complete(run(params))) + else: + print("No files to sanitize.") + + +if __name__ == "__main__": + main() diff --git a/qubespdfconverter/server.py b/qubespdfconverter/server.py new file mode 100755 index 0000000..3e4561e --- /dev/null +++ b/qubespdfconverter/server.py @@ -0,0 +1,431 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +# The Qubes OS Project, http://www.qubes-os.org +# +# Copyright (C) 2013 Joanna Rutkowska +# Copyright (C) 2020 Jason Phan +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +########################### +# The project "Dangerzone" reused the idea of this script based on: +# https://blog.invisiblethings.org/2013/02/21/converting-untrusted-pdfs-into-trusted.html +# +# - https://github.com/firstlookmedia/dangerzone-converter +# - https://dangerzone.rocks/ +# - https://github.com/firstlookmedia/dangerzone +# +# Dangerzone try to export the idea to non Qubes based system, and try to improve it. +# Both projects can improve the other. +########################### + +import asyncio +import subprocess +import sys +import os +import socket +import time +from pathlib import Path +from dataclasses import dataclass +from tempfile import TemporaryDirectory +import magic +import uno +from com.sun.star.beans import PropertyValue + +DEPTH = 8 +STDIN_READ_SIZE = 65536 + +def unlink(path): + """Wrapper for pathlib.Path.unlink(path, missing_ok=True)""" + try: + path.unlink() + except FileNotFoundError: + pass + + +async def cancel_task(task): + task.cancel() + try: + await task + except: + pass + + +async def terminate_proc(proc): + if proc.returncode is None: + proc.terminate() + await proc.wait() + + +async def wait_proc(proc, cmd): + try: + await proc.wait() + except asyncio.CancelledError: + await terminate_proc(proc) + raise + + if proc.returncode: + raise subprocess.CalledProcessError(proc.returncode, cmd) + + +def send_b(data): + """Qrexec wrapper for sending binary data to the client""" + if isinstance(data, (str, int)): + data = str(data).encode() + + sys.stdout.buffer.write(data) + sys.stdout.buffer.flush() + + +def send(data): + """Qrexec wrapper for sending text data to the client""" + print(data, flush=True) + + +def recv_b(): + """Qrexec wrapper for receiving binary data from the client""" + untrusted_data = sys.stdin.buffer.read() + if not untrusted_data: + raise EOFError + return untrusted_data + + +class Representation: + """Umbrella object for a file's initial and final representations + + The initial representation must be of a format from which we can derive + the final representation without breaking any of its requirements. + Generally, this makes the initial representation some sort of image file + (e.g. PNG, JPEG). + + The final representation must be of a format such that if the initial + representation contains malicious code/data, such code/data is excluded + from the final representation upon conversion. Generally, this makes the + final representation a relatively simple format (e.g., RGB bitmap). + + :param path: Path to original, unsanitized file + :param prefix: Path prefix for representations + :param f_suffix: File extension of initial representation (without .) + :param i_suffix: File extension of final representation (without .) + """ + + def __init__(self, path, prefix, i_suffix, f_suffix): + self.path = path + self.page = prefix.name + self.initial = prefix.with_suffix(f".{i_suffix}") + self.final = prefix.with_suffix(f".{f_suffix}") + self.dim = None + + + async def convert(self, password): + """Convert initial representation to final representation""" + cmd = [ + "gm", + "convert", + str(self.initial), + "-depth", + str(DEPTH), + f"rgb:{self.final}" + ] + + await self.create_irep(password) + self.dim = await self._dim() + + proc = await asyncio.create_subprocess_exec(*cmd) + try: + await wait_proc(proc, cmd) + finally: + await asyncio.get_running_loop().run_in_executor( + None, + unlink, + self.initial + ) + + + async def create_irep(self, password): + """Create initial representation""" + cmd = [ + "pdftocairo", + "-opw", + str(password), + "-upw", + str(password), + str(self.path), + "-png", + "-f", + str(self.page), + "-l", + str(self.page), + "-singlefile", + str(Path(self.initial.parent, self.initial.stem)) + ] + + proc = await asyncio.create_subprocess_exec(*cmd) + try: + await wait_proc(proc, cmd) + except subprocess.CalledProcessError: + cmd = [ + "gm", + "convert", + str(self.path), + "png:"+str(self.initial) + ] + proc = await asyncio.create_subprocess_exec(*cmd) + await wait_proc(proc, cmd) + + + async def _dim(self): + """Identify image dimensions of initial representation""" + cmd = ["gm", "identify", "-format", "%w %h", str(self.initial)] + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=subprocess.PIPE + ) + + try: + output, _ = await proc.communicate() + except asyncio.CancelledError: + await terminate_proc(proc) + raise + return output.partition(b"\n")[0].decode("ascii") + + +@dataclass(frozen=True) +class BatchEntry: + task: asyncio.Task + rep: Representation + + +class BaseFile: + """Unsanitized file""" + def __init__(self, path, password, gui): + self.path = path + self.password = password + self.gui = gui + self.pagenums = 0 + self.batch = None + + + def _read_password(self, password_success): + if not password_success: + if self.gui: + cmd = ["zenity", "--title", "File protected by password", "--password"] + self.password = subprocess.run(cmd, capture_output=True, check=True)\ + .stdout.split(b"\n")[0] + else: + # TODO doesn't correctly close/kill the client + raise ValueError("Incorrect password") + + + def _decrypt(self): + """ + Try to remove the password of a libreoffice-compatible file, + and store the resulting file in INITIAL_NAME.nopassword + """ + + src = "file://"+str(self.path) + dst = "file://"+str(self.path)+".nopassword" + + local_context = uno.getComponentContext() + resolver = local_context.ServiceManager.createInstanceWithContext( + "com.sun.star.bridge.UnoUrlResolver", + local_context + ) + ctx = resolver.resolve( + "uno:socket,host=localhost,port=2202;urp;StarOffice.ComponentContext" + ) + smgr = ctx.ServiceManager + desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + + hidden_property = PropertyValue() + hidden_property.Name = "Hidden" + hidden_property.Value = True + + password_property = PropertyValue() + password_property.Name = "Password" + password_property.Value = self.password + + document = desktop.loadComponentFromURL( + src, + "_blank", + 0, + (password_property, hidden_property,) + ) + document.storeAsURL(dst, ()) + + + async def sanitize(self): + """Start sanitization tasks""" + + password_success = False + mimetype = magic.detect_from_filename(str(self.path)).mime_type + if mimetype.startswith("video/") or mimetype.startswith("audio/"): + raise ValueError("Cannot convert media to PDF") + if mimetype.startswith("image/"): + pass + elif mimetype == "application/pdf": + while not password_success: + cmd = ["pdfinfo", "-opw", self.password, "-upw", self.password, str(self.path)] + password_success = not b"Incorrect password" in \ + subprocess.run(cmd, capture_output=True, check=True).stderr + self._read_password(password_success) + else: + # Performance could be improved by only starting + # the libreoffice when needed (aka: when the file need to be decrypted). + # But code is simpler that way + + # Launch libreoffice server + cmd = [ + "libreoffice", + "--accept=socket,host=localhost,port=2202;urp;", + "--norestore", + "--nologo", + "--nodefault" + ] + libreoffice_process = subprocess.Popen(cmd, stderr=open(os.devnull, 'wb')) + + # Wait until libreoffice server is ready + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(1) + while sock.connect_ex(('127.0.0.1', 2202)) != 0: + time.sleep(1) + + # Remove password from file using libreoffice API + while not password_success: + try: + self._decrypt() + password_success = True + except: + self._read_password(False) + + libreoffice_process.terminate() + cmd = [ + "libreoffice", + "--convert-to", + "pdf", + str(self.path) + ".nopassword", + "--outdir", + self.path.parents[0] + ] + subprocess.run(cmd, capture_output=True, check=True) + os.rename(str(self.path) + ".pdf", str(self.path)) + self.pagenums = self._pagenums() + self.batch = asyncio.Queue(self.pagenums) + + send(self.pagenums) + + publish_task = asyncio.create_task(self._publish()) + consume_task = asyncio.create_task(self._consume()) + + try: + await asyncio.gather(publish_task, consume_task) + except subprocess.CalledProcessError: + await cancel_task(publish_task) + + while not self.batch.empty(): + convert_task = await self.batch.get() + await cancel_task(convert_task) + self.batch.task_done() + + raise + + + def _pagenums(self): + """Return the number of pages in the suspect file""" + cmd = ["pdfinfo", "-opw", self.password, "-upw", self.password, str(self.path)] + try: + output = subprocess.run(cmd, capture_output=True, check=True) + except subprocess.CalledProcessError: + return 1 + + for line in output.stdout.decode().splitlines(): + if "Pages:" in line: + return int(line.split(":")[1]) + + return 1 + + + async def _publish(self): + """Extract initial representations and enqueue conversion tasks""" + for page in range(1, self.pagenums + 1): + rep = Representation( + self.path, + Path(self.path.parent, str(page)), + "png", + "rgb" + ) + task = asyncio.create_task(rep.convert(self.password)) + batch_e = BatchEntry(task, rep) + await self.batch.join() + + try: + await self.batch.put(batch_e) + except asyncio.CancelledError: + await cancel_task(task) + raise + + + async def _consume(self): + """Await conversion tasks and send final representation to client""" + for _ in range(self.pagenums): + batch_e = await self.batch.get() + await batch_e.task + + rgb_data = await asyncio.get_running_loop().run_in_executor( + None, + batch_e.rep.final.read_bytes + ) + + await asyncio.get_running_loop().run_in_executor( + None, + unlink, + batch_e.rep.final + ) + await asyncio.get_running_loop().run_in_executor( + None, + send, + batch_e.rep.dim + ) + send_b(rgb_data) + + self.batch.task_done() + + +def main(): + try: + data = recv_b() + except EOFError: + sys.exit(1) + password_data = data.partition(b"\n") + password = password_data[0].decode("utf-8") + gui_data = password_data[2].partition(b"\n") + gui = gui_data[0] == b"1" + data = gui_data[2] + + with TemporaryDirectory(prefix="qvm-sanitize") as tmpdir: + pdf_path = Path(tmpdir, "original") + pdf_path.write_bytes(data) + base = BaseFile(pdf_path, password, gui) + + loop = asyncio.get_event_loop() + try: + loop.run_until_complete(base.sanitize()) + except subprocess.CalledProcessError: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/qubespdfconverter/tests.py b/qubespdfconverter/tests/__init__.py similarity index 100% rename from qubespdfconverter/tests.py rename to qubespdfconverter/tests/__init__.py diff --git a/qvm-convert-pdf b/qvm-convert-pdf deleted file mode 100755 index 8a1261b..0000000 --- a/qvm-convert-pdf +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# -# The Qubes OS Project, http://www.qubes-os.org -# -# Copyright (C) 2013 Joanna Rutkowska -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# -# - -if [ $# -lt 1 ]; then - echo "File is required" >&2 - exit 1 -fi -DISPVM=$(cat /rw/config/PdfConvert_dispvm 2> /dev/null) -if [ -n "$DISPVM" ]; then - DISPVM=":$DISPVM" -fi -exec /usr/bin/qrexec-client-vm "@dispvm$DISPVM" qubes.PdfConvert /usr/lib/qubes/qpdf-convert-client "$@" diff --git a/qvm-convert-pdf.gnome b/qvm-convert-pdf.gnome index c6aa986..d61f639 100755 --- a/qvm-convert-pdf.gnome +++ b/qvm-convert-pdf.gnome @@ -20,5 +20,8 @@ # # -export PROGRESS_FOR_GUI="yes" -/usr/bin/qvm-convert-pdf "$@" | zenity --progress --text="Converting PDF using Disposable VM..." --auto-close --auto-kill +if [ $# -ne 1 ]; then +exit 1 +fi + +/usr/bin/qvm-convert-pdf -g "$@" | zenity --progress --text="Converting PDF using Disposable VM..." --auto-close --auto-kill diff --git a/qvm_convert_pdf_nautilus.py b/qvm_convert_pdf_nautilus.py old mode 100755 new mode 100644 diff --git a/rpm_spec/qpdf-converter.spec.in b/rpm_spec/qpdf-converter.spec.in index 35f2331..a6dff58 100644 --- a/rpm_spec/qpdf-converter.spec.in +++ b/rpm_spec/qpdf-converter.spec.in @@ -34,9 +34,16 @@ License: GPL URL: https://github.com/QubesOS/qubes-app-linux-pdf-converter BuildRequires: pandoc +BuildRequires: python%{python3_pkgversion}-setuptools +BuildRequires: python%{python3_pkgversion}-devel -Requires: poppler-utils GraphicsMagick libreoffice file net-tools +Requires: poppler-utils GraphicsMagick libreoffice Requires: nautilus-python +Requires: python%{python3_pkgversion} >= 3.7 +Requires: python%{python3_pkgversion}-pillow +Requires: python%{python3_pkgversion}-click +Requires: python%{python3_pkgversion}-tqdm +Requires: python%{python3_pkgversion}-magic Source0: %{name}-%{version}.tar.gz @@ -56,13 +63,15 @@ rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root,-) /etc/qubes-rpc/qubes.PdfConvert -/usr/lib/qubes/qpdf-convert-client /usr/lib/qubes/qpdf-convert-server /usr/lib/qubes/qvm-convert-pdf.gnome /usr/bin/qvm-convert-pdf /usr/share/nautilus-python/extensions/qvm_convert_pdf_nautilus.py* /usr/share/kde4/services/qvm-convert-pdf.desktop %{_mandir}/man1/qvm-convert-pdf.1* +%dir %{python3_sitelib}/qubespdfconverter-*.egg-info +%{python3_sitelib}/qubespdfconverter-*.egg-info/* +%{python3_sitelib}/qubespdfconverter %changelog @CHANGELOG@ diff --git a/setup.py b/setup.py index f2766c0..6733429 100644 --- a/setup.py +++ b/setup.py @@ -22,14 +22,55 @@ # USA. # +import sys +import os +import setuptools.command.install from setuptools import setup +if sys.version_info[0:2] < (3, 7): + # on older python install just tests (dom0 package) + packages = ['qubespdfconverter.tests'] +else: + packages = ['qubespdfconverter', 'qubespdfconverter.tests'] + +# create simple scripts that run much faster than "console entry points" +class CustomInstall(setuptools.command.install.install): + def run(self): + super().run() + if 'qubespdfconverter' not in packages: + return + scripts = [ + ('usr/lib/qubes/qpdf-convert-server', 'qubespdfconverter.server'), + ('usr/bin/qvm-convert-pdf', 'qubespdfconverter.client'), + ] + for file, pkg in scripts: + path = os.path.join(self.root, file) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w") as f: + f.write( +"""#!/usr/bin/python3 +from {} import main +import sys +if __name__ == '__main__': + sys.exit(main()) +""".format(pkg)) + + os.chmod(path, 0o755) + setup( name='qubespdfconverter', version=open('version').read().strip(), - packages=['qubespdfconverter'], + packages=packages, + install_requires=[ + 'Click', + 'Pillow', + 'tqdm' + ], entry_points={ 'qubes.tests.extra.for_template': 'qubespdfconverter = qubespdfconverter.tests:list_tests', - } + }, + cmdclass={ + 'install': CustomInstall + }, ) diff --git a/tests/all b/tests/all new file mode 100755 index 0000000..0f28061 --- /dev/null +++ b/tests/all @@ -0,0 +1,27 @@ +#!/bin/bash +relative_directory=$(dirname "$0") +password=toor + +rm -f "$relative_directory"/files_success/*.trusted.pdf + +for file in "$relative_directory"/files_success/*; do + echo "Converting $file" + sed 's#CLIENT_VM_CMD\s*=.*$#CLIENT_VM_CMD = ["'"$relative_directory"'/../qubespdfconverter/server.py"]#g' "$relative_directory"/../qubespdfconverter/client.py | python3 - -a "$relative_directory"/files_success/ --password "$password" "$file" + error_code=$? + if [ $error_code -ne 0 ]; then + echo "Conversion failed!" + exit 1 + fi +done + +for file in "$relative_directory"/files_error/*; do + echo "Converting $file" + sed 's#CLIENT_VM_CMD\s*=.*$#CLIENT_VM_CMD = ["'"$relative_directory"'/../qubespdfconverter/server.py"]#g' "$relative_directory"/../qubespdfconverter/client.py | python3 - -a "$relative_directory"/files_errors/ --password "$password" "$file" + error_code=$? + if [ $error_code -eq 0 ]; then + echo "The conversion should be failing" + exit 1 + fi +done + +echo "Everything seems to be OK" diff --git a/dev_tools/files_error/mgs.mp4 b/tests/files_error/mgs.mp4 similarity index 100% rename from dev_tools/files_error/mgs.mp4 rename to tests/files_error/mgs.mp4 diff --git a/dev_tools/files_success/arch-spec-0.3.pdf b/tests/files_success/arch-spec-0.3.pdf similarity index 100% rename from dev_tools/files_success/arch-spec-0.3.pdf rename to tests/files_success/arch-spec-0.3.pdf diff --git a/dev_tools/files_success/csv.csv b/tests/files_success/csv.csv similarity index 100% rename from dev_tools/files_success/csv.csv rename to tests/files_success/csv.csv diff --git a/dev_tools/files_success/doc.doc b/tests/files_success/doc.doc similarity index 100% rename from dev_tools/files_success/doc.doc rename to tests/files_success/doc.doc diff --git a/dev_tools/files_success/docx.docx b/tests/files_success/docx.docx similarity index 100% rename from dev_tools/files_success/docx.docx rename to tests/files_success/docx.docx diff --git a/dev_tools/files_success/docx_openxml.docx b/tests/files_success/docx_openxml.docx similarity index 100% rename from dev_tools/files_success/docx_openxml.docx rename to tests/files_success/docx_openxml.docx diff --git a/dev_tools/files_success/docx_password.docx b/tests/files_success/docx_password.docx similarity index 100% rename from dev_tools/files_success/docx_password.docx rename to tests/files_success/docx_password.docx diff --git a/dev_tools/files_success/mml.mml b/tests/files_success/mml.mml similarity index 100% rename from dev_tools/files_success/mml.mml rename to tests/files_success/mml.mml diff --git a/dev_tools/files_success/odf.odf b/tests/files_success/odf.odf similarity index 100% rename from dev_tools/files_success/odf.odf rename to tests/files_success/odf.odf diff --git a/dev_tools/files_success/odg.odg b/tests/files_success/odg.odg similarity index 100% rename from dev_tools/files_success/odg.odg rename to tests/files_success/odg.odg diff --git a/dev_tools/files_success/odg_password.odg b/tests/files_success/odg_password.odg similarity index 100% rename from dev_tools/files_success/odg_password.odg rename to tests/files_success/odg_password.odg diff --git a/dev_tools/files_success/odp.odp b/tests/files_success/odp.odp similarity index 100% rename from dev_tools/files_success/odp.odp rename to tests/files_success/odp.odp diff --git a/dev_tools/files_success/odp_password.odp b/tests/files_success/odp_password.odp similarity index 100% rename from dev_tools/files_success/odp_password.odp rename to tests/files_success/odp_password.odp diff --git a/dev_tools/files_success/ods.ods b/tests/files_success/ods.ods similarity index 100% rename from dev_tools/files_success/ods.ods rename to tests/files_success/ods.ods diff --git a/dev_tools/files_success/odt.odt b/tests/files_success/odt.odt similarity index 100% rename from dev_tools/files_success/odt.odt rename to tests/files_success/odt.odt diff --git a/dev_tools/files_success/odt_password.odt b/tests/files_success/odt_password.odt similarity index 100% rename from dev_tools/files_success/odt_password.odt rename to tests/files_success/odt_password.odt diff --git a/dev_tools/files_success/pdf_password.pdf b/tests/files_success/pdf_password.pdf similarity index 100% rename from dev_tools/files_success/pdf_password.pdf rename to tests/files_success/pdf_password.pdf diff --git a/dev_tools/files_success/ppt.ppt b/tests/files_success/ppt.ppt similarity index 100% rename from dev_tools/files_success/ppt.ppt rename to tests/files_success/ppt.ppt diff --git a/dev_tools/files_success/pptx.pptx b/tests/files_success/pptx.pptx similarity index 100% rename from dev_tools/files_success/pptx.pptx rename to tests/files_success/pptx.pptx diff --git a/dev_tools/files_success/shinra.png b/tests/files_success/shinra.png similarity index 100% rename from dev_tools/files_success/shinra.png rename to tests/files_success/shinra.png diff --git a/dev_tools/files_success/xls.xls b/tests/files_success/xls.xls similarity index 100% rename from dev_tools/files_success/xls.xls rename to tests/files_success/xls.xls diff --git a/dev_tools/files_success/xlsx.xlsx b/tests/files_success/xlsx.xlsx similarity index 100% rename from dev_tools/files_success/xlsx.xlsx rename to tests/files_success/xlsx.xlsx diff --git a/version b/version index 4a36342..ebf14b4 100644 --- a/version +++ b/version @@ -1 +1 @@ -3.0.0 +2.1.8