Skip to content

Commit

Permalink
isolation_provider: Always terminate spawned process
Browse files Browse the repository at this point in the history
Previously, we always assumed that the spawned process would quit
within 3 seconds. This was an arbitrary call, and did not work in
practice.

We can improve our standing here by doing the following:

1. Make `Popen.wait()` calls take a generous amount of time (since they
   are usually on the sad path), and handle any timeout errors that they
   throw. This way, a slow conversion process cleanup does not take too
   much of our users time, nor is it reported as an error.
2. Always make sure that once the conversion of doc to pixels is over,
   the corresponding process will finish within a reasonable amount of
   time as well.

Fixes #749
apyrgio committed Apr 24, 2024
1 parent cd4cbdb commit f57d2f7
Showing 1 changed file with 67 additions and 9 deletions.
76 changes: 67 additions & 9 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import contextlib
import logging
import os
import subprocess
import sys
import tempfile
from abc import ABC, abstractmethod
from pathlib import Path
from typing import IO, Callable, Optional
from typing import IO, Callable, Iterator, Optional

from colorama import Fore, Style

@@ -23,6 +24,9 @@
PIXELS_TO_PDF_LOG_END = "----- PIXELS TO PDF LOG END -----"

TIMEOUT_EXCEPTION = 15
TIMEOUT_GRACE = 15
TIMEOUT_FORCE = 5


def read_bytes(f: IO[bytes], size: int, exact: bool = True) -> bytes:
"""Read bytes from a file-like object."""
@@ -70,20 +74,14 @@ def convert(
self.progress_callback = progress_callback
document.mark_as_converting()
try:
conversion_proc = self.start_doc_to_pixels_proc(document)
with tempfile.TemporaryDirectory() as t:
Path(f"{t}/pixels").mkdir()
self.doc_to_pixels(document, t, conversion_proc)
conversion_proc.wait(3)
# TODO: validate convert to pixels output
with self.doc_to_pixels_proc(document) as conversion_proc:
self.doc_to_pixels(document, t, conversion_proc)
self.pixels_to_pdf(document, t, ocr_lang)
document.mark_as_safe()
if document.archive_after_conversion:
document.archive()
except errors.ConverterProcException as e:
exception = self.get_proc_exception(conversion_proc)
self.print_progress(document, True, str(exception), 0)
document.mark_as_failed()
except errors.ConversionException as e:
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
@@ -217,6 +215,66 @@ def terminate_doc_to_pixels_proc(
"""Terminate gracefully the process started for the doc-to-pixels phase."""
pass

def ensure_stop_doc_to_pixels_proc(
self,
document: Document,
p: subprocess.Popen,
timeout_grace: int = TIMEOUT_GRACE,
timeout_force: int = TIMEOUT_FORCE,
) -> None:
"""Stop the conversion process, or ensure it has exited.
This method should be called when we want to verify that the doc-to-pixels
process has exited, or terminate it ourselves. The termination should happen as
gracefully as possible, and we should not block indefinitely until the process
has exited.
"""
# Check if the process completed.
ret = p.poll()
if ret is not None:
return

# At this point, the process is still running. This may be benign, as we haven't
# waited for it yet. Terminate it gracefully.
self.terminate_doc_to_pixels_proc(document, p)
try:
p.wait(timeout_grace)
except subprocess.TimeoutExpired:
log.warning(
f"Conversion process did not terminate gracefully after {timeout_grace}"
" seconds. Killing it forcefully..."
)

# Forcefully kill the running process.
p.kill()
try:
p.wait(timeout_force)
except subprocess.TimeoutExpired:
log.warning(
"Conversion process did not terminate forcefully after"
f" {timeout_force} seconds. Resources may linger..."
)

@contextlib.contextmanager
def doc_to_pixels_proc(
self,
document: Document,
timeout_exception: int = TIMEOUT_EXCEPTION,
timeout_grace: int = TIMEOUT_GRACE,
timeout_force: int = TIMEOUT_FORCE,
) -> Iterator[subprocess.Popen]:
"""Start a conversion process, pass it to the caller, and then clean it up."""
p = self.start_doc_to_pixels_proc(document)
try:
yield p
except errors.ConverterProcException as e:
exception = self.get_proc_exception(p, timeout_exception)
raise exception from e
finally:
self.ensure_stop_doc_to_pixels_proc(
document, p, timeout_grace=timeout_grace, timeout_force=timeout_force
)


# From global_common:

0 comments on commit f57d2f7

Please sign in to comment.