Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable container logging and add large test logic and documentation #386

Merged
merged 14 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "tests/test_docs_large"]
path = tests/test_docs_large
url = https://github.com/freedomofpress/dangerzone-test-set
34 changes: 28 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
LARGE_TEST_REPO_DIR:=tests/test_docs_large
GIT_DESC=$$(git describe)
JUNIT_FLAGS := --capture=sys -o junit_logging=all

.PHONY: lint-black
lint-black: ## check python source code formatting issues, with black
black --check --diff --exclude dev_scripts/envs ./
black --check --diff --exclude dev_scripts/envs --exclude $(LARGE_TEST_REPO_DIR) ./

.PHONY: lint-black-apply
lint-black-apply: ## apply black's source code formatting suggestions
black --exclude dev_scripts/envs ./
black --exclude dev_scripts/envs --exclude $(LARGE_TEST_REPO_DIR) ./

.PHONY: lint-isort
lint-isort: ## check imports are organized, with isort
isort --check-only --skip dev_scripts/envs ./
isort --check-only --skip dev_scripts/envs --skip $(LARGE_TEST_REPO_DIR) ./

.PHONY: lint-isort-apply
lint-isort-apply: ## apply isort's imports organization suggestions
isort --skip dev_scripts/envs ./
isort --skip dev_scripts/envs --skip $(LARGE_TEST_REPO_DIR) ./

MYPY_ARGS := --ignore-missing-imports \
--disallow-incomplete-defs \
--disallow-untyped-defs \
--show-error-codes \
--warn-unreachable \
--warn-unused-ignores
--warn-unused-ignores \
--exclude $(LARGE_TEST_REPO_DIR)/*.py

mypy-host:
mypy $(MYPY_ARGS) dangerzone
Expand All @@ -41,8 +46,25 @@ test:
# shared state.
# See more in https://github.com/freedomofpress/dangerzone/issues/493
pytest --co -q tests/gui | grep -v ' collected' | xargs -n 1 pytest -v
pytest -v --cov --ignore dev_scripts --ignore tests/gui
pytest -v --cov --ignore dev_scripts --ignore tests/gui --ignore tests/test_large_set.py


.PHONY: test-large-requirements
test-large-requirements:
@git-lfs --version || (echo "ERROR: you need to install 'git-lfs'" && false)
@xmllint --version || (echo "ERROR: you need to install 'xmllint'" && false)

test-large-init: test-large-requirements
@echo "initializing 'test_docs_large' submodule"
git submodule init $(LARGE_TEST_REPO_DIR)
git submodule update $(LARGE_TEST_REPO_DIR)
cd $(LARGE_TEST_REPO_DIR) && $(MAKE) clone-docs

TEST_LARGE_RESULTS:=$(LARGE_TEST_REPO_DIR)/results/junit/commit_$(GIT_DESC).junit.xml
.PHONY: tests-large
test-large: test-large-init ## Run large test set
python -m pytest --tb=no tests/test_large_set.py::TestLargeSet -v $(JUNIT_FLAGS) --junitxml=$(TEST_LARGE_RESULTS)
python $(TEST_LARGE_RESULTS)/report.py $(TEST_LARGE_RESULTS)

# Makefile self-help borrowed from the securedrop-client project
# Explaination of the below shell command should it ever break.
Expand Down
8 changes: 8 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

This section documents the release process. Unless you're a dangerzone developer making a release, you'll probably never need to follow it.

## Large document testing

Parallel to the QA process, the release candidate should be put through the large document tests in a dedicated machine to run overnight.

Follow the instructions in `docs/developer/TESTING.md` to run the tests.

These tests will identify any regressions or progression in terms of document coverage.

## QA

To ensure that new releases do not introduce regressions, and support existing
Expand Down
155 changes: 81 additions & 74 deletions dangerzone/conversion/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,84 +22,91 @@ def running_on_qubes() -> bool:
return os.path.exists("/usr/share/qubes/marker-vm")


async def read_stream(
sr: asyncio.StreamReader, callback: Optional[Callable] = None
) -> bytes:
"""Consume a byte stream line-by-line.

Read all lines in a stream until EOF. If a user has passed a callback, call it for
each line.

Note that the lines are in bytes, since we can't assume that all command output will
be UTF-8 encoded. Higher level commands are advised to decode the output to Unicode,
if they know its encoding.
"""
buf = b""
while True:
line = await sr.readline()
if sr.at_eof():
break
if callback is not None:
callback(line)
# TODO: This would be a good place to log the received line, mostly for debug
# logging.
buf += line
return buf


async def run_command(
args: List[str],
*,
error_message: str,
timeout_message: str,
timeout: Optional[float],
stdout_callback: Optional[Callable] = None,
stderr_callback: Optional[Callable] = None,
) -> Tuple[bytes, bytes]:
"""Run a command and get its output.

Run a command using asyncio.subprocess, consume its standard streams, and return its
output in bytes.

:raises RuntimeError: if the process returns a non-zero exit status
:raises TimeoutError: if the process times out
"""
# Start the provided command, and return a handle. The command will run in the
# background.
proc = await asyncio.subprocess.create_subprocess_exec(
*args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)

assert proc.stdout is not None
assert proc.stderr is not None

# Create asynchronous tasks that will consume the standard streams of the command,
# and call callbacks if necessary.
stdout_task = asyncio.create_task(read_stream(proc.stdout, stdout_callback))
stderr_task = asyncio.create_task(read_stream(proc.stderr, stderr_callback))

# Wait until the command has finished, for a specific timeout. Then, verify that the
# command has completed successfully. In any other case, raise an exception.
try:
ret = await asyncio.wait_for(proc.wait(), timeout=timeout)
except asyncio.exceptions.TimeoutError:
raise TimeoutError(timeout_message)
if ret != 0:
raise RuntimeError(error_message)

# Wait until the tasks that consume the command's standard streams have exited as
# well, and return their output.
stdout = await stdout_task
stderr = await stderr_task
return (stdout, stderr)


class DangerzoneConverter:
def __init__(self, progress_callback: Optional[Callable] = None) -> None:
self.percentage: float = 0.0
self.progress_callback = progress_callback
self.captured_output: bytes = b""

async def read_stream(
self, sr: asyncio.StreamReader, callback: Optional[Callable] = None
) -> bytes:
"""Consume a byte stream line-by-line.

Read all lines in a stream until EOF. If a user has passed a callback, call it for
each line.

Note that the lines are in bytes, since we can't assume that all command output will
be UTF-8 encoded. Higher level commands are advised to decode the output to Unicode,
if they know its encoding.
"""
buf = b""
while True:
line = await sr.readline()
if sr.at_eof():
break
self.captured_output += line
if callback is not None:
callback(line)
buf += line
return buf

async def run_command(
self,
args: List[str],
*,
error_message: str,
timeout_message: str,
timeout: Optional[float],
stdout_callback: Optional[Callable] = None,
stderr_callback: Optional[Callable] = None,
) -> Tuple[bytes, bytes]:
"""Run a command and get its output.

Run a command using asyncio.subprocess, consume its standard streams, and return its
output in bytes.

:raises RuntimeError: if the process returns a non-zero exit status
:raises TimeoutError: if the process times out
"""
# Start the provided command, and return a handle. The command will run in the
# background.
proc = await asyncio.subprocess.create_subprocess_exec(
*args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)

# Log command to debug log so we can trace back which errors
# are from each command
self.captured_output += f"[COMMAND] {' '.join(args)}\n".encode()

assert proc.stdout is not None
assert proc.stderr is not None

# Create asynchronous tasks that will consume the standard streams of the command,
# and call callbacks if necessary.
stdout_task = asyncio.create_task(
self.read_stream(proc.stdout, stdout_callback)
)
stderr_task = asyncio.create_task(
self.read_stream(proc.stderr, stderr_callback)
)

# Wait until the command has finished, for a specific timeout. Then, verify that the
# command has completed successfully. In any other case, raise an exception.
try:
ret = await asyncio.wait_for(proc.wait(), timeout=timeout)
except asyncio.exceptions.TimeoutError:
raise TimeoutError(timeout_message)
if ret != 0:
raise RuntimeError(error_message)

# Wait until the tasks that consume the command's standard streams have exited as
# well, and return their output.
stdout = await stdout_task
stderr = await stderr_task
return (stdout, stderr)

def calculate_timeout(
self, size: float, pages: Optional[float] = None
Expand Down
23 changes: 14 additions & 9 deletions dangerzone/conversion/doc_to_pixels.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import magic

from .common import DangerzoneConverter, run_command, running_on_qubes
from .common import DangerzoneConverter, running_on_qubes


class DocumentToPixels(DangerzoneConverter):
Expand Down Expand Up @@ -189,7 +189,7 @@ async def convert(self) -> None:
"/tmp",
"/tmp/input_file",
]
await run_command(
await self.run_command(
args,
error_message="Conversion to PDF with LibreOffice failed",
timeout_message=(
Expand All @@ -213,7 +213,7 @@ async def convert(self) -> None:
"/tmp/input_file",
"/tmp/input_file.pdf",
]
await run_command(
await self.run_command(
args,
error_message="Conversion to PDF with GraphicsMagick failed",
timeout_message=(
Expand All @@ -231,7 +231,7 @@ async def convert(self) -> None:

# Obtain number of pages
self.update_progress("Calculating number of pages")
stdout, _ = await run_command(
stdout, _ = await self.run_command(
["pdfinfo", pdf_filename],
error_message="PDF file is corrupted",
timeout_message=(
Expand Down Expand Up @@ -317,7 +317,7 @@ def pdftoppm_progress_callback(line: bytes) -> None:

page_base = "/tmp/page"

await run_command(
await self.run_command(
[
"pdftoppm",
pdf_filename,
Expand Down Expand Up @@ -351,7 +351,7 @@ async def install_libreoffice_ext(self, libreoffice_ext: str) -> None:
f"/usr/lib/libreoffice/share/extensions/{libreoffice_ext}/",
f"/libreoffice_ext/{libreoffice_ext}",
]
await run_command(
await self.run_command(
unzip_args,
error_message="LibreOffice extension installation failed (unzipping)",
timeout_message="unzipping LibreOffice extension timed out 5 seconds",
Expand All @@ -377,11 +377,16 @@ async def main() -> int:

try:
await converter.convert()
error_code = 0 # Success!
except (RuntimeError, TimeoutError, ValueError) as e:
converter.update_progress(str(e), error=True)
return 1
else:
return 0 # Success!
error_code = 1

if not running_on_qubes():
# Write debug information (containers version)
with open("/tmp/dangerzone/captured_output.txt", "wb") as container_log:
container_log.write(converter.captured_output)
return error_code


if __name__ == "__main__":
Expand Down
3 changes: 3 additions & 0 deletions dangerzone/conversion/doc_to_pixels_qubes_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ async def main() -> None:
rgb_data = rgb_file.read()
await write_bytes(rgb_data)

# Write debug information
await write_bytes(converter.captured_output, file=sys.stderr)


if __name__ == "__main__":
sys.exit(asyncio.run(main()))
Loading