Skip to content

Commit

Permalink
Add page limit of 10000
Browse files Browse the repository at this point in the history
Theoretically the max pages would be 65536 (2byte unsigned int.
However this limit is much higher than practical documents have
and larger ones can lead to unforseen problems, for example RAM
limitations.

We thus opted to use a lower limit of 10K. The limit must be
detected client-side, given that the server is distrusted. However
we also check it in the server, just as a fail-early mechanism.
  • Loading branch information
deeplow committed Sep 28, 2023
1 parent afba362 commit 54b8ffb
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 6 deletions.
3 changes: 3 additions & 0 deletions dangerzone/conversion/doc_to_pixels.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ async def convert(self) -> None:
else:
raise errors.NoPageCountException()

if num_pages > errors.MAX_PAGES:
raise errors.MaxPagesException()

# Get a more precise timeout, based on the number of pages
timeout = self.calculate_timeout(size, num_pages)

Expand Down
9 changes: 9 additions & 0 deletions dangerzone/conversion/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# XXX: errors start at 128 for conversion-related issues
ERROR_SHIFT = 128
MAX_PAGES = 10000


class ConversionException(Exception):
Expand Down Expand Up @@ -53,6 +54,14 @@ class NoPageCountException(PagesException):
error_message = "Number of pages could not be extracted from PDF"


class MaxPagesException(PagesException):
"""Max number of pages enforced by the client (to fail early) but also the
server, which distrusts the client"""

error_code = ERROR_SHIFT + 42
error_message = f"Number of pages exceeds maximum ({MAX_PAGES})"


class PDFtoPPMException(ConversionException):
error_code = ERROR_SHIFT + 50
error_message = "Error converting PDF to Pixels (pdftoppm)"
Expand Down
5 changes: 2 additions & 3 deletions dangerzone/isolation_provider/qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,8 @@ def __convert(
os.set_blocking(self.proc.stdout.fileno(), False)

n_pages = read_int(self.proc.stdout, timeout)
if n_pages == 0:
# FIXME: Fail loudly in that case
return False
if n_pages == 0 or n_pages > errors.MAX_PAGES:
raise errors.MaxPagesException()
if ocr_lang:
percentage_per_page = 50.0 / n_pages
else:
Expand Down
19 changes: 19 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
import zipfile
from pathlib import Path
from typing import Callable, List

Expand All @@ -13,7 +14,11 @@
BASIC_SAMPLE_PDF = "sample-pdf.pdf"
BASIC_SAMPLE_DOC = "sample-doc.doc"
SAMPLE_EXTERNAL_DIRECTORY = "test_docs_external"
SAMPLE_COMPRESSED_DIRECTORY = "test_docs_compressed"

test_docs_dir = Path(__file__).parent.joinpath(SAMPLE_DIRECTORY)
test_docs_compressed_dir = Path(__file__).parent.joinpath(SAMPLE_COMPRESSED_DIRECTORY)

test_docs = [
p
for p in test_docs_dir.rglob("*")
Expand Down Expand Up @@ -73,6 +78,20 @@ def unreadable_pdf(tmp_path: Path) -> str:
return str(file_path)


@pytest.fixture
def pdf_11k_pages(tmp_path: Path) -> str:
"""11K page document with pages of 1x1 px. Generated with the command:
gs -sDEVICE=pdfwrite -o sample-11k-pages.pdf -dDEVICEWIDTHPOINTS=1 -dDEVICEHEIGHTPOINTS=1 -c 11000 {showpage} repeat
"""

filename = "sample-11k-pages.pdf"
zip_path = test_docs_compressed_dir / f"{filename}.zip"
with zipfile.ZipFile(zip_path, "r") as zip_file:
zip_file.extractall(tmp_path)
return str(tmp_path / filename)


@pytest.fixture
def uncommon_text() -> str:
"""Craft a string with Unicode characters that are considered not common.
Expand Down
15 changes: 14 additions & 1 deletion tests/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
from colorama import Style
from pytest_mock import MockerFixture

from dangerzone.conversion import errors
from dangerzone.document import Document
from dangerzone.isolation_provider import base

from .. import sanitized_text, uncommon_text
from .. import pdf_11k_pages, sanitized_text, uncommon_text


class IsolationProviderTest:
Expand Down Expand Up @@ -48,3 +49,15 @@ def test_print_progress(
else:
assert log_info_spy.call_args[0][0].endswith(sanitized_text)
log_error_spy.assert_not_called()

def test_max_pages_received(
self,
pdf_11k_pages: str,
provider: base.IsolationProvider,
mocker: MockerFixture,
) -> None:
provider.progress_callback = mocker.MagicMock()
doc = Document(pdf_11k_pages)
with pytest.raises(errors.MaxPagesException):
success = provider._convert(doc, ocr_lang=None)
assert not success
3 changes: 2 additions & 1 deletion tests/isolation_provider/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from dangerzone.document import Document
from dangerzone.isolation_provider.container import Container

from .. import sanitized_text, uncommon_text
# XXX Fixtures used in abstract Test class need to be imported regardless
from .. import pdf_11k_pages, sanitized_text, uncommon_text
from .base import IsolationProviderTest


Expand Down
3 changes: 2 additions & 1 deletion tests/isolation_provider/test_qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from dangerzone.isolation_provider.qubes import Qubes

from .. import sanitized_text, uncommon_text
# XXX Fixtures used in abstract Test class need to be imported regardless
from .. import pdf_11k_pages, sanitized_text, uncommon_text
from .base import IsolationProviderTest


Expand Down
Binary file not shown.

0 comments on commit 54b8ffb

Please sign in to comment.