diff --git a/docs/use/start.md b/docs/use/start.md index 49a00356..6734303f 100644 --- a/docs/use/start.md +++ b/docs/use/start.md @@ -112,4 +112,7 @@ Then for parsing and output rendering: * - `nb_output_stderr` - `show` - One of 'show', 'remove', 'warn', 'error' or 'severe', [see here](use/format/stderr) for details. +* - `nb_merge_streams` + - `False` + - If `True`, ensure all stdout / stderr output streams are merged into single outputs. This ensures deterministic outputs. ````` diff --git a/myst_nb/__init__.py b/myst_nb/__init__.py index 73244a88..1b9c79e7 100644 --- a/myst_nb/__init__.py +++ b/myst_nb/__init__.py @@ -133,6 +133,7 @@ def visit_element_html(self, node): app.add_config_value("nb_render_plugin", "default", "env") app.add_config_value("nb_render_text_lexer", "myst-ansi", "env") app.add_config_value("nb_output_stderr", "show", "env") + app.add_config_value("nb_merge_streams", False, "env") # Register our post-transform which will convert output bundles to nodes app.add_post_transform(PasteNodesToDocutils) diff --git a/myst_nb/render_outputs.py b/myst_nb/render_outputs.py index cfe9132a..659cd4f0 100644 --- a/myst_nb/render_outputs.py +++ b/myst_nb/render_outputs.py @@ -1,5 +1,6 @@ """A Sphinx post-transform, to convert notebook outpus to AST nodes.""" import os +import re from abc import ABC, abstractmethod from typing import List, Optional from unittest import mock @@ -91,6 +92,58 @@ def load_renderer(name: str) -> "CellOutputRendererBase": raise MystNbEntryPointError(f"No Entry Point found for myst_nb.mime_render:{name}") +RGX_CARRIAGERETURN = re.compile(r".*\r(?=[^\n])") +RGX_BACKSPACE = re.compile(r"[^\n]\b") + + +def coalesce_streams(outputs: List[NotebookNode]) -> List[NotebookNode]: + """Merge all stream outputs with shared names into single streams. + + This ensure deterministic outputs. + + Adapted from: + https://github.com/computationalmodelling/nbval/blob/master/nbval/plugin.py. + """ + if not outputs: + return [] + + new_outputs = [] + streams = {} + for output in outputs: + if output["output_type"] == "stream": + if output["name"] in streams: + streams[output["name"]]["text"] += output["text"] + else: + new_outputs.append(output) + streams[output["name"]] = output + else: + new_outputs.append(output) + + # process \r and \b characters + for output in streams.values(): + old = output["text"] + while len(output["text"]) < len(old): + old = output["text"] + # Cancel out anything-but-newline followed by backspace + output["text"] = RGX_BACKSPACE.sub("", output["text"]) + # Replace all carriage returns not followed by newline + output["text"] = RGX_CARRIAGERETURN.sub("", output["text"]) + + # We also want to ensure stdout and stderr are always in the same consecutive order, + # because they are asynchronous, so order isn't guaranteed. + for i, output in enumerate(new_outputs): + if output["output_type"] == "stream" and output["name"] == "stderr": + if ( + len(new_outputs) >= i + 2 + and new_outputs[i + 1]["output_type"] == "stream" + and new_outputs[i + 1]["name"] == "stdout" + ): + stdout = new_outputs.pop(i + 1) + new_outputs.insert(i, stdout) + + return new_outputs + + class CellOutputsToNodes(SphinxPostTransform): """Use the builder context to transform a CellOutputNode into Sphinx nodes.""" @@ -108,6 +161,8 @@ def run(self): renderer_cls = load_renderer(node.renderer) renderers[node.renderer] = renderer_cls renderer = renderer_cls(self.document, node, abs_dir) + if self.config.nb_merge_streams: + node._outputs = coalesce_streams(node.outputs) output_nodes = renderer.cell_output_to_nodes(self.env.nb_render_priority) node.replace_self(output_nodes) diff --git a/tests/notebooks/merge_streams.ipynb b/tests/notebooks/merge_streams.ipynb new file mode 100644 index 00000000..dc86d908 --- /dev/null +++ b/tests/notebooks/merge_streams.ipynb @@ -0,0 +1,82 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "import sys\n", + "print('stdout1', file=sys.stdout)\n", + "print('stdout2', file=sys.stdout)\n", + "print('stderr1', file=sys.stderr)\n", + "print('stderr2', file=sys.stderr)\n", + "print('stdout3', file=sys.stdout)\n", + "print('stderr3', file=sys.stderr)\n", + "1" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "stdout1\n", + "stdout2\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "stderr1\n", + "stderr2\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "stdout3\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "stderr3\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": {}, + "execution_count": 1 + } + ], + "metadata": {} + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/test_render_outputs.py b/tests/test_render_outputs.py index 67a37c3b..3ecda981 100644 --- a/tests/test_render_outputs.py +++ b/tests/test_render_outputs.py @@ -72,6 +72,17 @@ def test_stderr_remove(sphinx_run, file_regression): file_regression.check(doctree.pformat(), extension=".xml", encoding="utf8") +@pytest.mark.sphinx_params( + "merge_streams.ipynb", + conf={"jupyter_execute_notebooks": "off", "nb_merge_streams": True}, +) +def test_merge_streams(sphinx_run, file_regression): + sphinx_run.build() + assert sphinx_run.warnings() == "" + doctree = sphinx_run.get_resolved_doctree("merge_streams") + file_regression.check(doctree.pformat(), extension=".xml", encoding="utf8") + + @pytest.mark.sphinx_params( "metadata_image.ipynb", conf={"jupyter_execute_notebooks": "off", "nb_render_key": "myst"}, diff --git a/tests/test_render_outputs/test_merge_streams.xml b/tests/test_render_outputs/test_merge_streams.xml new file mode 100644 index 00000000..40c8c7dc --- /dev/null +++ b/tests/test_render_outputs/test_merge_streams.xml @@ -0,0 +1,23 @@ + + + + + import sys + print('stdout1', file=sys.stdout) + print('stdout2', file=sys.stdout) + print('stderr1', file=sys.stderr) + print('stderr2', file=sys.stderr) + print('stdout3', file=sys.stdout) + print('stderr3', file=sys.stderr) + 1 + + + stdout1 + stdout2 + stdout3 + + stderr1 + stderr2 + stderr3 + + 1 diff --git a/tox.ini b/tox.ini index fa70650b..32eca9fc 100644 --- a/tox.ini +++ b/tox.ini @@ -11,9 +11,9 @@ # then then deleting compiled files has been found to fix it: `find . -name \*.pyc -delete` [tox] -envlist = py37-sphinx3 +envlist = py37-sphinx4 -[testenv:py{36,37,38,39}-sphinx{3,4}] +[testenv:py{37,38,39}-sphinx{3,4}] extras = testing deps = sphinx3: sphinx>=3,<4