Skip to content

Commit

Permalink
fixes related to the code review of pull request QubesOS#9
Browse files Browse the repository at this point in the history
  • Loading branch information
neowutran committed Jul 14, 2020
1 parent 69ddb85 commit d2880ff
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 135 deletions.
11 changes: 2 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,12 @@ CONFIGURATION
To use a custom DisposableVM instead of the default one:

Let’s assume that this custom DisposableVM is called "web".
In your client Qube, specify this:
In dom0, add new line in "/etc/qubes-rpc/policy/qubes.PdfConvert"

```bash
echo "web" > /rw/config/PdfConvert_dispvm
YOUR_CLIENT_VM_NAME @dispvm allow,target=@dispvm:web
```

Then in dom0:

```bash
echo "@anyvm @dispvm:web allow" > /etc/qubes-rpc/policy/qubes.PdfConvert
```


AUTHORS
--------------

Expand Down
3 changes: 0 additions & 3 deletions archlinux/PKGBUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ depends=(libreoffice graphicsmagick zenity poppler python-nautilus python-click
build() {
ln -s "$srcdir"/../ "$srcdir/src"
}
check(){
src/tests/all
}
package() {
cd src
make install-vm DESTDIR="$pkgdir/"
Expand Down
2 changes: 0 additions & 2 deletions debian/tests/control

This file was deleted.

36 changes: 2 additions & 34 deletions qubespdfconverter/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,7 @@
import tqdm
import click

def get_dispvm_template():
try:
config_file = open("/rw/config/PdfConvert_dispvm", "r")
return config_file.readline().splitlines()[0]
except:
return None

CLIENT_VM_CMD = ["/usr/bin/qrexec-client-vm", "@dispvm", "qubes.PdfConvert"]
DISPVM_TEMPLATE = get_dispvm_template()
if DISPVM_TEMPLATE:
CLIENT_VM_CMD = ["/usr/bin/qrexec-client-vm", "@dispvm:"+DISPVM_TEMPLATE, "qubes.PdfConvert"]

MAX_PAGES = 10000
MAX_IMG_WIDTH = 10000
Expand Down Expand Up @@ -460,15 +450,9 @@ def __init__(self, path, pos):
self.base = None
self.proc = None
self.pdf = None
self.password = None
self.gui = False


async def run(self, archive, depth, in_place, password, gui):
self.password = "" if password is None else password
self.gui = gui


async def run(self, archive, depth, in_place):
self.proc = await asyncio.create_subprocess_exec(
*CLIENT_VM_CMD,
stdin=asyncio.subprocess.PIPE,
Expand Down Expand Up @@ -565,10 +549,7 @@ async def _send(self):
None,
self.path.read_bytes
)
gui_msg = "1" if self.gui else "0"
try:
await send(self.proc, self.password + "\n")
await send(self.proc, gui_msg + "\n")
await send(self.proc, data)
except BrokenPipeError as e:
raise QrexecError("Failed to send PDF") from e
Expand Down Expand Up @@ -605,9 +586,7 @@ async def run(params):
for job in jobs:
tasks.append(asyncio.create_task(job.run(params["archive"],
params["batch"],
params["in_place"],
params["password"],
params["gui"])))
params["in_place"])))

asyncio.get_running_loop().add_signal_handler(
signal.SIGINT,
Expand Down Expand Up @@ -669,17 +648,6 @@ async def run(params):
is_flag=True,
help="Replace original files instead of archiving them"
)
@click.option(
"-g",
"--gui",
is_flag=True,
help="Allow GUI popup to be displayed"
)
@click.option(
"-p",
"--password",
help="Password for reading the file."
)
@click.argument(
"files",
type=Path,
Expand Down
158 changes: 74 additions & 84 deletions qubespdfconverter/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,8 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

###########################
# The project "Dangerzone" reused the idea of this script based on:
# https://blog.invisiblethings.org/2013/02/21/converting-untrusted-pdfs-into-trusted.html
#
# A similar project exist:
# - https://github.com/firstlookmedia/dangerzone-converter
# - https://dangerzone.rocks/
# - https://github.com/firstlookmedia/dangerzone
#
# Dangerzone try to export the idea to non Qubes based system, and try to improve it.
# Both projects can improve the other.
###########################

Expand Down Expand Up @@ -160,9 +154,9 @@ async def create_irep(self, password):
cmd = [
"pdftocairo",
"-opw",
str(password),
password,
"-upw",
str(password),
password,
str(self.path),
"-png",
"-f",
Expand All @@ -172,7 +166,6 @@ async def create_irep(self, password):
"-singlefile",
str(Path(self.initial.parent, self.initial.stem))
]

proc = await asyncio.create_subprocess_exec(*cmd)
try:
await wait_proc(proc, cmd)
Expand All @@ -181,7 +174,7 @@ async def create_irep(self, password):
"gm",
"convert",
str(self.path),
"png:"+str(self.initial)
f"png:{self.initial}"
]
proc = await asyncio.create_subprocess_exec(*cmd)
await wait_proc(proc, cmd)
Expand Down Expand Up @@ -211,29 +204,29 @@ class BatchEntry:

class BaseFile:
"""Unsanitized file"""
def __init__(self, path, password, gui):
def __init__(self, path):
self.path = path
self.password = password
self.gui = gui
self.pagenums = 0
self.batch = None

self.password = ""

def _read_password(self, password_success):
if not password_success:
if self.gui:
cmd = ["zenity", "--title", "File protected by password", "--password"]
self.password = subprocess.run(cmd, capture_output=True, check=True)\
.stdout.split(b"\n")[0]
else:
# TODO doesn't correctly close/kill the client
raise ValueError("Incorrect password")
cmd = ["zenity", "--title", "File protected by password", "--password"]
self.password = subprocess.run(cmd, capture_output=True, check=True)\
.stdout.split(b"\n")[0]


def _decrypt(self):
"""
Try to remove the password of a libreoffice-compatible file,
and store the resulting file in INITIAL_NAME.nopassword
and store the resulting file in INITIAL_NAME.nopassword.
The steps are:
- Connect to a libreoffice API server, listening on localhost on port 2202
- Try to load a document with additionnal properties:
- "Hidden" to not load any libreoffice GUI
- "Password" to automatically try to decrypt the document
- Store the document without additionnal properties [this remove the password]
"""

src = "file://"+str(self.path)
Expand Down Expand Up @@ -273,60 +266,64 @@ async def sanitize(self):
password_success = False
mimetype = magic.detect_from_filename(str(self.path)).mime_type
if mimetype.startswith("video/") or mimetype.startswith("audio/"):
raise ValueError("Cannot convert media to PDF")
raise ValueError
if mimetype.startswith("image/"):
pass
elif mimetype == "application/pdf":
while not password_success:
cmd = ["pdfinfo", "-opw", self.password, "-upw", self.password, str(self.path)]
password_success = not b"Incorrect password" in \
subprocess.run(cmd, capture_output=True, check=True).stderr
self._read_password(password_success)
self.pagenums = 1
else:
# Performance could be improved by only starting
# the libreoffice when needed (aka: when the file need to be decrypted).
# But code is simpler that way
if mimetype == "application/pdf":
while not password_success:
cmd = ["pdfinfo", "-opw", self.password, "-upw", self.password, str(self.path)]
try:
password_success = not b"Incorrect password" in subprocess.\
run(cmd, capture_output=True, check=True).stderr
except subprocess.CalledProcessError:
password_success = False
self._read_password(password_success)
else:
# Performance could be improved by only starting
# the libreoffice when needed (aka: when the file need to be decrypted).
# But code is simpler that way

# Launch libreoffice server
cmd = [
"libreoffice",
"--accept=socket,host=localhost,port=2202;urp;",
"--norestore",
"--nologo",
"--nodefault"
]
libreoffice_process = subprocess.Popen(cmd, stderr=open(os.devnull, 'wb'))

# Wait until libreoffice server is ready
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
while sock.connect_ex(('127.0.0.1', 2202)) != 0:
time.sleep(1)

# Remove password from file using libreoffice API
while not password_success:
try:
self._decrypt()
password_success = True
except:
self._read_password(False)

libreoffice_process.terminate()
cmd = [
"libreoffice",
"--convert-to",
"pdf",
str(self.path) + ".nopassword",
"--outdir",
self.path.parents[0]
]
subprocess.run(cmd, capture_output=True, check=True)
os.rename(str(self.path) + ".pdf", str(self.path))

self.pagenums = self._pagenums()

# Launch libreoffice server
cmd = [
"libreoffice",
"--accept=socket,host=localhost,port=2202;urp;",
"--norestore",
"--nologo",
"--nodefault"
]
libreoffice_process = subprocess.Popen(cmd, stderr=open(os.devnull, 'wb'))

# Wait until libreoffice server is ready
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
while sock.connect_ex(('127.0.0.1', 2202)) != 0:
time.sleep(1)

# Remove password from file using libreoffice API
while not password_success:
try:
self._decrypt()
password_success = True
except:
self._read_password(False)

libreoffice_process.terminate()
cmd = [
"libreoffice",
"--convert-to",
"pdf",
str(self.path) + ".nopassword",
"--outdir",
self.path.parents[0]
]
subprocess.run(cmd, capture_output=True, check=True)
os.rename(str(self.path) + ".pdf", str(self.path))
self.pagenums = self._pagenums()
self.batch = asyncio.Queue(self.pagenums)

send(self.pagenums)

publish_task = asyncio.create_task(self._publish())
consume_task = asyncio.create_task(self._consume())

Expand All @@ -346,16 +343,14 @@ async def sanitize(self):
def _pagenums(self):
"""Return the number of pages in the suspect file"""
cmd = ["pdfinfo", "-opw", self.password, "-upw", self.password, str(self.path)]
try:
output = subprocess.run(cmd, capture_output=True, check=True)
except subprocess.CalledProcessError:
return 1
output = subprocess.run(cmd, capture_output=True, check=True)
pages = 0

for line in output.stdout.decode().splitlines():
if "Pages:" in line:
return int(line.split(":")[1])
pages = int(line.split(":")[1])

return 1
return pages


async def _publish(self):
Expand Down Expand Up @@ -409,16 +404,11 @@ def main():
data = recv_b()
except EOFError:
sys.exit(1)
password_data = data.partition(b"\n")
password = password_data[0].decode("utf-8")
gui_data = password_data[2].partition(b"\n")
gui = gui_data[0] == b"1"
data = gui_data[2]

with TemporaryDirectory(prefix="qvm-sanitize") as tmpdir:
pdf_path = Path(tmpdir, "original")
pdf_path.write_bytes(data)
base = BaseFile(pdf_path, password, gui)
base = BaseFile(pdf_path)

loop = asyncio.get_event_loop()
try:
Expand Down
7 changes: 4 additions & 3 deletions tests/all
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#!/bin/bash

# The password is "toor"
relative_directory=$(dirname "$0")
password=toor

rm -f "$relative_directory"/files_success/*.trusted.pdf

for file in "$relative_directory"/files_success/*; do
echo "Converting $file"
sed 's#CLIENT_VM_CMD\s*=.*$#CLIENT_VM_CMD = ["'"$relative_directory"'/../qubespdfconverter/server.py"]#g' "$relative_directory"/../qubespdfconverter/client.py | python3 - -a "$relative_directory"/files_success/ --password "$password" "$file"
sed 's#CLIENT_VM_CMD\s*=.*$#CLIENT_VM_CMD = ["'"$relative_directory"'/../qubespdfconverter/server.py"]#g' "$relative_directory"/../qubespdfconverter/client.py | python3 - -a "$relative_directory"/files_success/ "$file"
error_code=$?
if [ $error_code -ne 0 ]; then
echo "Conversion failed!"
Expand All @@ -16,7 +17,7 @@ done

for file in "$relative_directory"/files_error/*; do
echo "Converting $file"
sed 's#CLIENT_VM_CMD\s*=.*$#CLIENT_VM_CMD = ["'"$relative_directory"'/../qubespdfconverter/server.py"]#g' "$relative_directory"/../qubespdfconverter/client.py | python3 - -a "$relative_directory"/files_errors/ --password "$password" "$file"
sed 's#CLIENT_VM_CMD\s*=.*$#CLIENT_VM_CMD = ["'"$relative_directory"'/../qubespdfconverter/server.py"]#g' "$relative_directory"/../qubespdfconverter/client.py | python3 - -a "$relative_directory"/files_errors/ "$file"
error_code=$?
if [ $error_code -eq 0 ]; then
echo "The conversion should be failing"
Expand Down

0 comments on commit d2880ff

Please sign in to comment.