From c3d75058872fd578ba76c7b55ec01ed83cb1da31 Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Wed, 13 Sep 2023 15:09:28 +0300 Subject: [PATCH] qubes: Add client-side timeouts Extend the client-side capabilities of the Qubes isolation provider, by adding client-side timeout logic. This implementation brings the same logic that we used server-side to the client, by taking into account the original file size and the number of pages that the server returns. Since the code does not have the exact same insight as the server has, the calculated timeouts are in two places: 1. The timeout for getting the number of pages. This timeout takes into account: * the disposable qube startup time, and * the time it takes to convert a file type to PDF 2. The total timeout for converting the PDF into pixels, in the same way that we do it on the server-side. Besides these changes, we also ensure that partial reads (e.g., due to EOF) are detected (see exact=... argument) Some things that are not resolved in this commit are: * We have both client-side and server-side timeouts for the first phase of the conversion. Once containers can stream data back to the application (see #443), these server-side timeouts can be removed. * We do not show a proper error message when a timeout occurs. This will be part of the error handling PR (see #430) Fixes #446 Refs #443 Refs #430 --- dangerzone/isolation_provider/qubes.py | 67 +++++++++++++++++--------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index b9d9d8214..7b845b85a 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -13,10 +13,16 @@ from pathlib import Path from typing import IO, Callable, Optional -from ..conversion.common import running_on_qubes +from ..conversion.common import calculate_timeout, running_on_qubes from ..conversion.pixels_to_pdf import PixelsToPDF from ..document import Document -from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir +from ..util import ( + Stopwatch, + get_resource_path, + get_subprocess_startupinfo, + get_tmp_dir, + nonblocking_read, +) from .base import MAX_CONVERSION_LOG_CHARS, IsolationProvider log = logging.getLogger(__name__) @@ -26,26 +32,29 @@ "/tmp/safe-output-compressed.pdf" ) +# The maximum time a qube takes to start up. +STARTUP_TIME_SECONDS = 5 * 60 # 5 minutes + -def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes: - """Read bytes from stdout.""" - return p.stdout.read(buff_size) # type: ignore [union-attr] +def read_bytes(f: IO[bytes], size: int, timeout: float, exact: bool = True) -> bytes: + """Read bytes from a file-like object.""" + buf = nonblocking_read(f, size, timeout) + if exact and len(buf) != size: + raise ValueError("Did not receive exact number of bytes") + return buf -def read_int(p: subprocess.Popen) -> int: - """Read 2 bytes from stdout, and decode them as int.""" - untrusted_int = p.stdout.read(2) # type: ignore [union-attr] +def read_int(f: IO[bytes], timeout: float) -> int: + """Read 2 bytes from a file-like object, and decode them as int.""" + untrusted_int = read_bytes(f, 2, timeout) return int.from_bytes(untrusted_int, signed=False) -def read_debug_text(p: subprocess.Popen) -> str: +def read_debug_text(f: IO[bytes], size: int) -> str: """Read arbitrarily long text (for debug purposes)""" - if p.stderr: - untrusted_text = p.stderr.read(MAX_CONVERSION_LOG_CHARS) - p.stderr.close() - return untrusted_text.decode("ascii", errors="replace") - else: - return "" + timeout = calculate_timeout(size) + untrusted_text = read_bytes(f, size, timeout, exact=False) + return untrusted_text.decode("ascii", errors="replace") class Qubes(IsolationProvider): @@ -101,7 +110,13 @@ def _convert( stderr=subprocess.DEVNULL, ) - n_pages = read_int(p) + # Get file size (in MiB) + size = os.path.getsize(document.input_filename) / 1024**2 + timeout = calculate_timeout(size) + STARTUP_TIME_SECONDS + + assert p.stdout is not None + os.set_blocking(p.stdout.fileno(), False) + n_pages = read_int(p.stdout, timeout) if n_pages == 0: # FIXME: Fail loudly in that case return False @@ -109,14 +124,19 @@ def _convert( percentage_per_page = 50.0 / n_pages else: percentage_per_page = 100.0 / n_pages + + timeout = calculate_timeout(size, n_pages) + sw = Stopwatch(timeout) + sw.start() for page in range(1, n_pages + 1): # TODO handle too width > MAX_PAGE_WIDTH # TODO handle too big height > MAX_PAGE_HEIGHT - - width = read_int(p) - height = read_int(p) + width = read_int(p.stdout, timeout=sw.remaining) + height = read_int(p.stdout, timeout=sw.remaining) untrusted_pixels = read_bytes( - p, width * height * 3 + p.stdout, + width * height * 3, + timeout=sw.remaining, ) # three color channels # Wrapper code @@ -133,14 +153,17 @@ def _convert( self.print_progress_trusted(document, False, text, percentage) # Ensure nothing else is read after all bitmaps are obtained - p.stdout.close() # type: ignore [union-attr] + p.stdout.close() # TODO handle leftover code input text = "Converted document to pixels" self.print_progress_trusted(document, False, text, percentage) if getattr(sys, "dangerzone_dev", False): - untrusted_log = read_debug_text(p) + assert p.stderr is not None + os.set_blocking(p.stderr.fileno(), False) + untrusted_log = read_debug_text(p.stderr, MAX_CONVERSION_LOG_CHARS) + p.stderr.close() log.info( f"Conversion output (doc to pixels)\n{self.sanitize_conversion_str(untrusted_log)}" )