diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3acd9376..739dee19 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
     - uses: actions/setup-python@v2
     - uses: pre-commit/action@v2.0.0
       with:
-        extra_args: --hook-stage manual
+        extra_args: --hook-stage manual --all-files
 
   checks:
     name: Check Python ${{ matrix.python-version }} on ${{ matrix.runs-on }}
diff --git a/Makefile b/Makefile
index b55fac39..c95ea9e0 100644
--- a/Makefile
+++ b/Makefile
@@ -9,8 +9,9 @@ endif
 OSXFLAG=$(shell uname|grep -q Darwin && echo "-undefined dynamic_lookup")
 CFLAGS=--std=c++17 -O3 -Wall -fPIC -Irapidjson/include -Ipybind11/include -Icpp-peglib $(PYINC) -Iinclude
 LDFLAGS=-pthread
+PREFIX ?= /usr
 
-.PHONY: build all clean
+.PHONY: build all clean install
 
 all: demo examples
 
@@ -29,6 +30,12 @@ correctionlib: build/python.o build/correction.o build/formula_ast.o
 	$(CXX) $(LDFLAGS) -fPIC -shared $(OSXFLAG) $^ -o correctionlib/_core$(PYEXT)
 	touch correctionlib/__init__.py
 
+install: correctionlib
+	mkdir -p $(PREFIX)/include
+	install -m 644 include/correction.h $(PREFIX)/include
+	mkdir -p $(PREFIX)/lib
+	install -m 755 correctionlib/_core$(PYEXT) $(PREFIX)/lib
+
 clean:
 	rm -rf build
 	rm -f demo
diff --git a/include/correction.h b/include/correction.h
index 836d1a72..291ff8ff 100644
--- a/include/correction.h
+++ b/include/correction.h
@@ -6,7 +6,15 @@
 #include <variant>
 #include <map>
 #include <memory>
-#include <rapidjson/document.h>
+
+namespace rapidjson {
+  // actual definition for class Value;
+  template<typename CharType> struct UTF8;
+  class CrtAllocator;
+  template <typename BaseAllocator> class MemoryPoolAllocator;
+  template <typename Encoding, typename Allocator> class GenericValue;
+  typedef GenericValue<UTF8<char>, MemoryPoolAllocator<CrtAllocator>> Value;
+};
 
 
 namespace correction {
diff --git a/src/correction.cc b/src/correction.cc
index ac432b6f..f1be59d1 100644
--- a/src/correction.cc
+++ b/src/correction.cc
@@ -1,3 +1,4 @@
+#include <rapidjson/document.h>
 #include <rapidjson/filereadstream.h>
 #include <rapidjson/error/en.h>
 #include <optional>
@@ -339,7 +340,7 @@ const Content& Category::child(const std::vector<Variable::Type>& values) const
   if ( auto pval = std::get_if<std::string>(&values[variableIdx_]) ) {
     try {
       return std::get<StrMap>(map_).at(*pval);
-    } catch (std::out_of_range ex) {
+    } catch (std::out_of_range& ex) {
       if ( default_ ) {
         return *default_;
       }
@@ -351,7 +352,7 @@ const Content& Category::child(const std::vector<Variable::Type>& values) const
   else if ( auto pval = std::get_if<int>(&values[variableIdx_]) ) {
     try {
       return std::get<IntMap>(map_).at(*pval);
-    } catch (std::out_of_range ex) {
+    } catch (std::out_of_range& ex) {
       if ( default_ ) {
         return *default_;
       }
diff --git a/src/correctionlib/JSONEncoder.py b/src/correctionlib/JSONEncoder.py
new file mode 100755
index 00000000..c9d86740
--- /dev/null
+++ b/src/correctionlib/JSONEncoder.py
@@ -0,0 +1,163 @@
+"""A custom JSON encoder for corrections
+Author: Izaak Neutelings (March 2021)
+Description: Write JSON with indents more compactly by collapsing some lists and dictionaries
+Instructions: Print or write JSON dictionary 'data' as
+  import JSONEncoder
+  print(JSONEncoder.write(data,sort_keys=True,indent=2,maxlistlen=25,maxdictlen=3,breakbrackets=False))
+  print(JSONEncoder.dumps(data,sort_keys=True,indent=2,maxlistlen=25,maxdictlen=3,breakbrackets=False))
+Adapted from:
+  https://stackoverflow.com/questions/16264515/json-dumps-custom-formatting
+"""
+import json
+import math
+from typing import Any, List, Type
+
+import pydantic
+
+
+def write(data: Any, fname: str, **kwargs: Any) -> None:
+    """Help function to quickly write JSON file formatted by JSONEncoder."""
+    with open(fname, "w") as fout:
+        fout.write(dumps(data, **kwargs))
+
+
+def dumps(data: Any, sort_keys: bool = False, **kwargs: Any) -> str:
+    """Help function to quickly dump dictionary formatted by JSONEncoder."""
+    if isinstance(data, pydantic.BaseModel):  # for pydantic
+        return data.json(cls=JSONEncoder, exclude_unset=True, **kwargs)
+    else:  # for standard data structures
+        return json.dumps(data, cls=JSONEncoder, sort_keys=sort_keys, **kwargs)
+
+
+class JSONEncoder(json.JSONEncoder):
+    """
+    Encoder to make correctionlib JSON more compact, but still readable:
+    - keep list of primitives (int, float, str) on one line,
+      or split over several if the length is longer than a given maxlen
+    - do not break line for short dictionary if all values are primitive
+    - do not break line after bracket for first key of dictionary,
+      unless itself nested in dictionary
+    """
+
+    def __init__(self, *args: Any, **kwargs: Any):
+        if kwargs.get("indent", None) is None:
+            kwargs["indent"] = 2
+        # maximum of primitive elements per list, before breaking lines
+        self.maxlistlen = kwargs.pop("maxlistlen", 25)
+        # maximum of primitive elements per dict, before breaking lines
+        self.maxdictlen = kwargs.pop("maxdictlen", 2)
+        # maximum length of strings in short dict, before breaking lines
+        self.maxstrlen = kwargs.pop("maxstrlen", 2 * self.maxlistlen)
+        # break after opening bracket
+        self.breakbrackets = kwargs.pop("breakbrackets", False)
+        super().__init__(*args, **kwargs)
+        self._indent = 0  # current indent
+        self.parent = type(None)  # type of parent for recursive use
+
+    def encode(self, obj: Any) -> str:
+        grandparent = self.parent  # type: Type[Any]
+        self.parent = type(obj)
+        retval = ""
+        if isinstance(obj, (list, tuple)):  # lists, tuples
+            output = []
+            if all(
+                isinstance(x, (int, float, str)) for x in obj
+            ):  # list of primitives only
+                strlen = sum(len(s) for s in obj if isinstance(s, str))
+                indent_str = " " * (self._indent + self.indent)
+                if strlen > self.maxstrlen and any(
+                    len(s) > 3 for s in obj if isinstance(s, str)
+                ):
+                    obj = [
+                        json.dumps(s) for s in obj
+                    ]  # convert everything into a string
+                    if any(
+                        len(s) > self.maxstrlen / 4 for s in obj
+                    ):  # break list of long strings into multiple lines
+                        output = obj
+                    else:  # group strings into several lines
+                        line = []  # type: List[str]
+                        nchars = 0
+                        for item in obj:
+                            if len(line) == 0 or nchars + len(item) < self.maxstrlen:
+                                line.append(item)
+                                nchars += len(item)
+                            else:  # new line
+                                output.append(", ".join(line))
+                                line = [item]
+                                nchars = len(item)
+                        if line:
+                            output.append(", ".join(line))
+                elif len(obj) <= self.maxlistlen:  # write short list on one line
+                    for item in obj:
+                        output.append(json.dumps(item))
+                    retval = "[ " + ", ".join(output) + " ]"
+                else:  # break long list into multiple lines
+                    nlines = math.ceil(len(obj) / float(self.maxlistlen))
+                    maxlen = int(len(obj) / nlines)
+                    for i in range(0, nlines):
+                        line = []
+                        for item in obj[i * maxlen : (i + 1) * maxlen]:
+                            line.append(json.dumps(item))
+                        output.append(", ".join(line))
+                if not retval:
+                    lines = (",\n" + indent_str).join(output)  # lines between brackets
+                    if (
+                        grandparent == dict or self.breakbrackets
+                    ):  # break first line after opening bracket
+                        retval = (
+                            "[\n" + indent_str + lines + "\n" + " " * self._indent + "]"
+                        )
+                    else:  # do not break first line
+                        retval = (
+                            "["
+                            + " " * (self.indent - 1)
+                            + lines
+                            + "\n"
+                            + " " * self._indent
+                            + "]"
+                        )
+            else:  # list of lists, tuples, dictionaries
+                self._indent += self.indent
+                indent_str = " " * self._indent
+                for item in obj:
+                    output.append(indent_str + self.encode(item))
+                self._indent -= self.indent
+                indent_str = " " * self._indent
+                retval = "[\n" + ",\n".join(output) + "\n" + indent_str + "]"
+        elif isinstance(obj, dict):  # dictionaries
+            output = []
+            if (
+                len(obj) <= self.maxdictlen
+                and all(isinstance(obj[k], (int, float, str)) for k in obj)
+                and sum(len(k) + len(obj[k]) for k in obj if isinstance(obj[k], str))
+                <= self.maxstrlen
+            ):  # write short dict on one line
+                retval = (
+                    "{ "
+                    + ", ".join(json.dumps(k) + ": " + self.encode(obj[k]) for k in obj)
+                    + " }"
+                )
+            else:  # break long dict into multiple line
+                self._indent += self.indent
+                indent_str = " " * self._indent
+                first = (
+                    grandparent not in (type(None), dict) and not self.breakbrackets
+                )  # break after opening brace
+                for key, value in obj.items():
+                    valstr = self.encode(value)
+                    if (
+                        first and "\n" not in valstr
+                    ):  # no break between opening brace and first key
+                        row = " " * (self.indent - 1) + json.dumps(key) + ": " + valstr
+                    else:  # break before key
+                        row = "\n" + indent_str + json.dumps(key) + ": " + valstr
+                    output.append(row)
+                    first = False
+                self._indent -= self.indent
+                indent_str = " " * self._indent
+                retval = "{" + ",".join(output) + "\n" + indent_str + "}"
+        else:  # use default formatting
+            retval = json.dumps(obj)
+        self.parent = grandparent
+        return retval
diff --git a/tests/test_jsonencoder.py b/tests/test_jsonencoder.py
new file mode 100755
index 00000000..aa41612e
--- /dev/null
+++ b/tests/test_jsonencoder.py
@@ -0,0 +1,247 @@
+from correctionlib.JSONEncoder import dumps
+
+
+def test_jsonencode():
+    data = {
+        "layer1": {
+            "layer2_1": {
+                "layer3_1": [
+                    {"x": 1, "y": 7},
+                    {"x": 0, "y": 4},
+                    {"x": 5, "y": 3},
+                    {"x": 6, "y": 9},
+                    {"key": "foo", "value": 1},
+                    {"key": "foo", "value": {k: v for v, k in enumerate("abcd")}},
+                    {k: v for v, k in enumerate("ab")},
+                    {k: v for v, k in enumerate("abc")},
+                    {k: v for v, k in enumerate("abcd")},
+                    {k: {k2: v2 for v2, k2 in enumerate("ab")} for k in "ab"},
+                ],
+                "layer3_2": "string",
+                "layer3_3": [
+                    {"x": 2, "y": 8, "z": 3},
+                    {"x": 1, "y": 5, "z": 4},
+                    {"x": 6, "y": 9, "z": 8},
+                ],
+            },
+            "layer2_2": {
+                "layer3_4": [
+                    ["a", "b", "c"],
+                    [c for c in "abcdefghijklmnopqrstuvwxyz"],
+                    [c for c in "abcdefghijklmnopqrstuvwxyz123"],
+                    [c for c in "abcdefghijklmnopqrstuvwxyz" * 2],
+                    [
+                        "this is short",
+                        "very short",
+                    ],
+                    ["this is medium long", "verily, can you see?"],
+                    ["this one is a bit longer,", "in order to find the edge..."],
+                    [
+                        "this",
+                        "list of",
+                        "strings",
+                        "is a bit",
+                        "longer",
+                        "in order",
+                        "to find",
+                        "the edge",
+                        "but the",
+                        "words",
+                        "are short",
+                    ],
+                    [
+                        "this",
+                        1,
+                        2,
+                        "list of",
+                        45,
+                        "also",
+                        66,
+                        "contains",
+                        "some",
+                        "numbers",
+                        "for the",
+                        100,
+                        "heck of",
+                        "it",
+                        "see if",
+                        "it splits",
+                    ],
+                    [
+                        "this",
+                        "list of strings is",
+                        "a bit longer,",
+                        "in order",
+                        "to find the edge...",
+                    ],
+                    [
+                        "this is a very, very long string to test line break",
+                        "and this is another very long string",
+                    ],
+                ],
+                "layer3_5": [
+                    list(range(1, 10 + 1)),
+                    list(range(1, 20 + 1)),
+                    list(range(1, 24 + 1)),
+                    list(range(1, 25 + 1)),
+                    list(range(1, 26 + 1)),
+                    list(range(1, 27 + 1)),
+                    list(range(1, 30 + 1)),
+                    list(range(1, 40 + 1)),
+                    list(range(1, 50 + 1)),
+                    list(range(1, 51 + 1)),
+                    list(range(1, 52 + 1)),
+                ],
+                "layer3_6": list(range(1, 20 + 1)),
+                "layer3_7": list(range(1, 40 + 1)),
+                "layer3_8": [
+                    {
+                        "key": "this is short",
+                        "value": "very short",
+                    },
+                    {
+                        "key": "this is medium long",
+                        "value": "verily, can you see?",
+                    },
+                    {
+                        "key": "this is one is a bit longer",
+                        "value": "to find the edge",
+                    },
+                    {
+                        "key": "this is a very long string to test line break",
+                        "value": "another very long string",
+                    },
+                ],
+            },
+        }
+    }
+
+    formatted = dumps(
+        data,
+        sort_keys=True,
+        indent=2,
+        maxlistlen=25,
+        maxdictlen=3,
+        breakbrackets=False,
+    )
+
+    expected = """\
+{
+  "layer1": {
+    "layer2_1": {
+      "layer3_1": [
+        { "x": 1, "y": 7 },
+        { "x": 0, "y": 4 },
+        { "x": 5, "y": 3 },
+        { "x": 6, "y": 9 },
+        { "key": "foo", "value": 1 },
+        { "key": "foo",
+          "value": {
+            "a": 0,
+            "b": 1,
+            "c": 2,
+            "d": 3
+          }
+        },
+        { "a": 0, "b": 1 },
+        { "a": 0, "b": 1, "c": 2 },
+        { "a": 0,
+          "b": 1,
+          "c": 2,
+          "d": 3
+        },
+        { "a": { "a": 0, "b": 1 },
+          "b": { "a": 0, "b": 1 }
+        }
+      ],
+      "layer3_2": "string",
+      "layer3_3": [
+        { "x": 2, "y": 8, "z": 3 },
+        { "x": 1, "y": 5, "z": 4 },
+        { "x": 6, "y": 9, "z": 8 }
+      ]
+    },
+    "layer2_2": {
+      "layer3_4": [
+        [ "a", "b", "c" ],
+        [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
+          "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"
+        ],
+        [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
+          "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "1", "2"
+        ],
+        [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
+          "r", "s", "t", "u", "v", "w", "x", "y", "z", "a", "b", "c", "d", "e", "f", "g", "h",
+          "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y"
+        ],
+        [ "this is short", "very short" ],
+        [ "this is medium long", "verily, can you see?" ],
+        [ "this one is a bit longer,",
+          "in order to find the edge..."
+        ],
+        [ "this", "list of", "strings", "is a bit", "longer",
+          "in order", "to find", "the edge", "but the", "words",
+          "are short"
+        ],
+        [ "this", 1, 2, "list of", 45, "also", 66, "contains", "some",
+          "numbers", "for the", 100, "heck of", "it", "see if",
+          "it splits"
+        ],
+        [ "this",
+          "list of strings is",
+          "a bit longer,",
+          "in order",
+          "to find the edge..."
+        ],
+        [ "this is a very, very long string to test line break",
+          "and this is another very long string"
+        ]
+      ],
+      "layer3_5": [
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+          14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
+        ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+          14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
+        ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
+        ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+          21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40
+        ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+          26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50
+        ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+          35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
+        ],
+        [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+          35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
+        ]
+      ],
+      "layer3_6": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 ],
+      "layer3_7": [
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40
+      ],
+      "layer3_8": [
+        { "key": "this is short", "value": "very short" },
+        { "key": "this is medium long", "value": "verily, can you see?" },
+        { "key": "this is one is a bit longer",
+          "value": "to find the edge"
+        },
+        { "key": "this is a very long string to test line break",
+          "value": "another very long string"
+        }
+      ]
+    }
+  }
+}"""
+    assert formatted == expected, f"Found:\n {formatted}"