From 5dea24f212615e69a4f9c45f15b8b0d532ea39b9 Mon Sep 17 00:00:00 2001
From: rjdbcm <rjdbcm@outlook.com>
Date: Mon, 9 Dec 2024 19:50:31 -0600
Subject: [PATCH] 1.8.0: add regexploit

Signed-off-by: rjdbcm <rjdbcm@outlook.com>
---
 README.rst               |  23 +++-
 meson.build              |   2 +-
 ozi_build/_at.py         |  58 ++++++++++
 ozi_build/_branch.py     |  81 ++++++++++++++
 ozi_build/_categories.py |  93 +++++++++++++++
 ozi_build/_char.py       | 236 +++++++++++++++++++++++++++++++++++++++
 ozi_build/_files.py      |  35 ++++++
 ozi_build/_groupref.py   |  28 +++++
 ozi_build/_ranges.py     |  33 ++++++
 ozi_build/_redos.py      | 225 +++++++++++++++++++++++++++++++++++++
 ozi_build/_repeat.py     |  71 ++++++++++++
 ozi_build/_sequence.py   | 115 +++++++++++++++++++
 ozi_build/_sre.py        | 205 ++++++++++++++++++++++++++++++++++
 ozi_build/_text.py       |  51 +++++++++
 ozi_build/_util.py       |  51 +++++++++
 ozi_build/config.py      |   2 +
 ozi_build/meson.build    |  15 ++-
 17 files changed, 1319 insertions(+), 5 deletions(-)
 create mode 100644 ozi_build/_at.py
 create mode 100644 ozi_build/_branch.py
 create mode 100644 ozi_build/_categories.py
 create mode 100644 ozi_build/_char.py
 create mode 100644 ozi_build/_files.py
 create mode 100644 ozi_build/_groupref.py
 create mode 100644 ozi_build/_ranges.py
 create mode 100644 ozi_build/_redos.py
 create mode 100644 ozi_build/_repeat.py
 create mode 100644 ozi_build/_sequence.py
 create mode 100644 ozi_build/_sre.py
 create mode 100644 ozi_build/_text.py

diff --git a/README.rst b/README.rst
index dc03237..f4ba7c1 100644
--- a/README.rst
+++ b/README.rst
@@ -4,21 +4,38 @@ OZI.build
 
 This is the `OZI-Project <https://github.com/OZI-Project>`_ maintained fork of the mesonpep517 0.2 tag.
 
-This is a simple module that implements pep517 for the meson build system.
+This is a module that implements PEP-517 for the meson build system.
 
 This means that you only need to provide a ``pyproject.toml`` in your project
 source root to be able to publish your project built with meson on PyPI
 and to create a wheel for the project.
 
+Other features include:
+
+* compiling modules to bytecode with pyc_wheel
+* scanning ``pyproject.toml`` for exploitable ReDoS patterns with regexploit
+
 For more information have a look at `the documentation <https://docs.oziproject.dev/en/stable/ozi_build.html>`_
 
-OZI.build is licensed under Apache-2.0 and includes ``pyc_wheel`` and
-portions of ``wheel`` whose copyright information is reproduced here.
+License
+-------
+
+OZI.build is licensed under Apache-2.0 and includes ``regexploit``,
+``pyc_wheel`` and portions of ``wheel`` whose copyright information is
+reproduced here.
+
+Apache-2.0 contributors
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``regexploit`` Copyright (c) 2021 Ben Caller <REMOVETHISPREFIX.ben@doyensec.com>
 
 ``pyc_wheel`` Copyright (c) 2016 Grant Patten <grant@gpatten.com>
 
 ``pyc_wheel`` Copyright (c) 2019-2021 Adam Karpierz <adam@karpierz.net>
 
+MIT contributors
+^^^^^^^^^^^^^^^^
+
 ``wheel`` Copyright (c) 2012-2014 Daniel Holth <dholth@fastmail.fm> and contributors.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/meson.build b/meson.build
index 1cc8b2f..92d7c9e 100644
--- a/meson.build
+++ b/meson.build
@@ -1,4 +1,4 @@
-project('OZI.build', version : '1.7.2', license : 'apache-2.0')
+project('OZI.build', version : '1.8.0', license : 'apache-2.0')
 fs = import('fs')
 python = import('python').find_installation()
 subdir('ozi_build')
diff --git a/ozi_build/_at.py b/ozi_build/_at.py
new file mode 100644
index 0000000..aa6326b
--- /dev/null
+++ b/ozi_build/_at.py
@@ -0,0 +1,58 @@
+from dataclasses import dataclass
+from typing import List, Optional
+
+from ._char import Character
+from ._repeat import InfiniteRepeat, Repeat
+
+
+@dataclass
+class EndOfString:
+    character: Optional[Character] = None
+
+    @property
+    def starriness(self):
+        return 0
+
+    @property
+    def minimum_length(self):
+        return 1  # Meaningless really here
+
+    def overall_character_class(self):
+        return self.character
+
+    def __repr__(self) -> str:
+        return f"${self.character}"
+
+    def __and__(self, other: Character) -> Optional[Character]:
+        return other & self.character
+
+    def example(self):
+        return "\n"  # ish
+
+    def set_character(self, previous_elems: List):
+        """
+        To force backtracking, the dollar will have to not match any previous groups until a mandatory group.
+        This can perhaps be made more lenient.
+
+        To cause backtracking on a long string of a's:
+        a*a*a*$ -> Any [^a]
+        [ab]+a*a*a*$ -> Any [^ab] (baaaaaaaaaaaab does not backtrack)
+        b+a*a*a*$ -> Any [^a]
+        .a*a*a*$ -> Any [^a]
+        .+a*a*a*$ -> Cannot backtrack because everything gets matched by .+ :(
+        """
+        self.character = None
+        for elem in reversed(previous_elems):
+            if elem.minimum_length > 0 and not isinstance(elem, InfiniteRepeat):
+                return  # xa*[ab]*a*$ -> [ab]
+            c = (
+                elem.maximal_character_class()
+                if isinstance(elem, Repeat)
+                else elem.overall_character_class()
+            )
+            if c:
+                if elem.minimum_length > 0 and (self.character & c) != self.character:
+                    # c is smaller than self.character (i.e. c is not an ANY)
+                    # x+a*[ab]*a*$ -> [ab]
+                    return
+                self.character |= c
diff --git a/ozi_build/_branch.py b/ozi_build/_branch.py
new file mode 100644
index 0000000..050a182
--- /dev/null
+++ b/ozi_build/_branch.py
@@ -0,0 +1,81 @@
+from dataclasses import dataclass
+from typing import Iterator, List, Optional
+
+from ._at import EndOfString
+from ._char import Character
+from ._repeat import FiniteRepeat, InfiniteRepeat
+from ._sequence import Sequence
+
+
+@dataclass(frozen=True)
+class Branch:
+    branches: List
+    optional: bool = False
+
+    def get_branches(self) -> Iterator:
+        for b in self.branches:
+            yield b
+        if self.optional:
+            yield None
+
+    @property
+    def starriness(self) -> int:
+        return max(b.starriness for b in self.branches)
+
+    @property
+    def minimum_length(self) -> int:
+        return 0 if self.optional else min(b.minimum_length for b in self.branches)
+
+    def overall_character_class(self) -> Optional[Character]:
+        c = Character.ANY()
+        for b in self.branches:
+            c &= b.overall_character_class()
+            if c is None:
+                return None
+        return c
+
+    def maximal_character_class(self):
+        return None  # Really?
+
+    def example(self) -> str:
+        if self.optional:
+            return ""
+        return self.branches[0].example()
+
+    def __len__(self) -> int:
+        return len(self.branches) + int(self.optional)
+
+    def __repr__(self) -> str:
+        middle = " | ".join(str(b) for b in self.branches)
+        return f"BR( {middle} ){'?' if self.optional else ''}"
+
+    def matching_repeats(self):
+        for b in self.branches:
+            if b.starriness > 0:
+                if isinstance(b, InfiniteRepeat):
+                    yield b
+                elif isinstance(b, Sequence):
+                    yield from b.matching_repeats()
+
+
+def make_branch(branches: List):
+    if len(branches) == 1:
+        return branches[0]
+    optional = False
+    non_empty_branches = [b for b in branches if b and not isinstance(b, EndOfString)]
+    if not non_empty_branches:
+        return None
+    if len(non_empty_branches) < len(branches):
+        # (ab|cd|) -> (ab|cd)?
+        optional = True
+    if all(isinstance(b, Character) for b in non_empty_branches):
+        # (a|b) -> [ab], (a|b|) -> [ab]?
+        c = None
+        for b in non_empty_branches:
+            c |= b
+        if optional:
+            return FiniteRepeat(c, 0, 1)
+        else:
+            return c
+
+    return Branch(non_empty_branches, optional)
diff --git a/ozi_build/_categories.py b/ozi_build/_categories.py
new file mode 100644
index 0000000..4cc10ae
--- /dev/null
+++ b/ozi_build/_categories.py
@@ -0,0 +1,93 @@
+import sys
+import unicodedata
+from enum import Enum, auto
+from typing import Set
+
+
+class Category(Enum):
+    DIGIT = auto()
+    NOT_DIGIT = auto()
+    WORD = auto()
+    NOT_WORD = auto()
+    SPACE = auto()
+    NOT_SPACE = auto()
+
+    @property
+    def is_positive(self) -> bool:
+        return not self.name.startswith("NOT_")
+
+    def negate(self) -> "Category":
+        if self.is_positive:
+            return Category[f"NOT_{self.name}"]
+        else:
+            return Category[self.name[4:]]
+
+    def example(self) -> str:
+        return EXAMPLE_FOR_CAT[self]
+
+    def contains(self, literal: int) -> bool:
+        c = chr(literal)
+        unicat = unicodedata.category(c)
+        if self is Category.DIGIT:
+            return unicat == "Nd"
+        if self is Category.NOT_DIGIT:
+            return unicat != "Nd"
+        if self is Category.WORD:
+            return (
+                unicat[0] == "L" or unicat == "Nd" or literal == 0x5F
+            )  # underscore is a word character
+        if self is Category.NOT_WORD:
+            return unicat[0] != "L" and unicat != "Nd" and literal != 0x5F
+        if self is Category.SPACE:
+            return unicat == "Zs" or c in (" ", "\n", "\t", "\r", "\f", "\v")
+        if self is Category.NOT_SPACE:
+            return unicat != "Zs" and c not in (" ", "\n", "\t", "\r", "\f", "\v")
+
+
+CATS = {}
+
+
+def list_category(category, full_unicode: bool = False):
+    if (cached := CATS.get(category)) :
+        yield from cached
+    for data in range((sys.maxunicode + 1) if full_unicode else 256):
+        c = chr(data)
+        unicat = unicodedata.category(c)
+        if category is Category.DIGIT:
+            if unicat == "Nd":
+                yield data
+        elif category is Category.NOT_DIGIT:
+            if unicat != "Nd":
+                yield data
+        elif category is Category.WORD:
+            if unicat[0] == "L" or unicat == "Nd" or data == 0x5F:
+                yield data
+        elif category is Category.NOT_WORD:
+            if unicat[0] != "L" and unicat != "Nd" and data != 0x5F:
+                yield data
+        elif category is Category.SPACE:
+            if unicat == "Zs" or c in (" ", "\n", "\t", "\r", "\f", "\v"):
+                yield data
+        elif category is Category.NOT_SPACE:
+            if unicat != "Zs" and c not in (" ", "\n", "\t", "\r", "\f", "\v"):
+                yield data
+
+
+def covers_any(categories: Set[Category]) -> bool:
+    for c in categories:
+        if c.is_positive and c.negate() in categories:
+            return True
+    return False
+
+
+# CATS[sre_parse.CATEGORY_DIGIT] = list(list_category(sre_parse.CATEGORY_DIGIT))
+# CATS[sre_parse.CATEGORY_SPACE] = list(list_category(sre_parse.CATEGORY_SPACE))
+# CATS[sre_parse.CATEGORY_WORD] = list(list_category(sre_parse.CATEGORY_WORD))
+EXAMPLE_FOR_CAT = {
+    Category.DIGIT: "4",
+    Category.NOT_DIGIT: "!",
+    Category.WORD: "w",
+    Category.NOT_WORD: "$",
+    Category.SPACE: " ",
+    Category.NOT_SPACE: ".",
+}
diff --git a/ozi_build/_char.py b/ozi_build/_char.py
new file mode 100644
index 0000000..865665a
--- /dev/null
+++ b/ozi_build/_char.py
@@ -0,0 +1,236 @@
+import string
+from dataclasses import dataclass
+from typing import Optional, Set
+
+from ._categories import Category, covers_any, list_category
+from ._ranges import Range, lits_to_ranges
+
+
+@dataclass(frozen=True)
+class Character:
+    literals: Optional[Set[int]] = None
+    categories: Optional[Set[Category]] = None
+    positive: bool = True
+
+    @staticmethod
+    def ANY() -> "Character":
+        return Character()
+
+    @staticmethod
+    def LITERAL(literal: int) -> "Character":
+        return Character({literal})
+
+    @property
+    def minimum_length(self) -> int:
+        return 1
+
+    @property
+    def starriness(self) -> int:
+        return 0
+
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self.positive,
+                tuple(sorted(self.literals)) if self.literals else None,
+                tuple(sorted(self.categories)) if self.categories else None,
+            )
+        )
+
+    def exact_character_class(self) -> "Character":
+        return self
+
+    def overall_character_class(self) -> "Character":
+        return self
+
+    def maximal_character_class(self) -> "Character":
+        return self
+
+    @property
+    def is_any(self) -> bool:
+        return self.literals is None and self.categories is None and self.positive
+
+    @property
+    def _is_positive_literal(self) -> bool:
+        return self.positive and self.literals is not None and self.categories is None
+
+    @property
+    def _is_negative_literal(self) -> bool:
+        return (
+            not self.positive and self.literals is not None and self.categories is None
+        )
+
+    @property
+    def _is_positive_category(self) -> bool:
+        return self.positive and self.literals is None and self.categories is not None
+
+    @property
+    def _is_negative_category(self) -> bool:
+        return (
+            not self.positive and self.literals is None and self.categories is not None
+        )
+
+    def expand_categories(self) -> "Character":
+        """
+        This is the nuclear option where we expand the categories into literals.
+        Can be huge in unicode.
+        """
+        if self.categories:
+            lits: Set[int] = set(self.literals) if self.literals else set()
+            for c in self.categories:
+                lits.update(list_category(c))
+            return Character(literals=lits, positive=self.positive)
+
+        return self
+
+    def __and__(self, other: "Optional[Character]") -> "Optional[Character]":
+        if other is None:
+            return None
+        if self.is_any:
+            return other
+        if other.is_any:
+            return self
+
+        # [ab] & [bc] -> [c]
+        if self._is_positive_literal and other._is_positive_literal:
+            lits = self.literals & other.literals
+            if not lits:
+                return None
+            return Character(literals=lits)
+        if self._is_positive_category and other._is_positive_category:
+            cats = self.categories & other.categories
+            if not cats:
+                return None
+            return Character(categories=cats)
+        # [^ab] & [^bc] -> [^abc]
+        if self._is_negative_literal and other._is_negative_literal:
+            return Character(literals=self.literals | other.literals, positive=False)
+        if self._is_negative_category and other._is_negative_category:
+            categories = self.categories | other.categories
+            if covers_any(categories):  # [^\d] & [^\D] = nothing
+                return None
+            return Character(categories=categories, positive=False)
+        # [ab] & [^bc] -> [a]
+        if self._is_positive_literal and other._is_negative_literal:
+            lits = self.literals - other.literals
+            if not lits:
+                return None
+            return Character(literals=lits)
+        if other._is_positive_literal and self._is_negative_literal:
+            lits = other.literals - self.literals
+            if not lits:
+                return None
+            return Character(literals=lits)
+
+        # TODO: be less lazy and sort out the general case without expanding everything if possible
+        return self.expand_categories() & other.expand_categories()
+
+    def __rand__(self, other: "Optional[Character]") -> "Optional[Character]":
+        return self & other
+
+    def __or__(self, other: "Optional[Character]") -> "Optional[Character]":
+        if other is None:
+            return self
+        if self.is_any or other.is_any:
+            return Character.ANY()
+        if self == other:
+            return self
+        if nor := (self.negate() & other.negate()):  # Slow, but logical
+            return nor.negate()
+        else:
+            return Character.ANY()
+
+    def __ror__(self, other: "Optional[Character]") -> "Optional[Character]":
+        return self | other
+
+    def __repr__(self) -> str:
+        if self.is_any:
+            return "."
+        result = "["
+        if not self.positive:
+            result += "^"
+        more = False
+        if self.literals is not None:
+            lits, ranges = lits_to_ranges(self.literals)
+            result += ",".join(literal_repr(o) for o in lits)
+            if lits and ranges:
+                result += ","
+            result += ",".join(range_repr(r) for r in ranges)
+            more = True
+        if self.categories is not None:
+            if more:
+                result += ";"
+            result += ",".join(c.name for c in self.categories)
+            more = True
+        return result + "]"
+
+    def example(self) -> str:
+        for c in nice_characters():
+            if self.matches(c):
+                return chr(c)
+
+        if self.positive:
+            if self.literals:
+                if len(self.literals) > 1:
+                    # Try to avoid \n due to false positives with the . character and flags
+                    return chr(next(o for o in self.literals if o != 0xA))
+                return chr(next(iter(self.literals)))
+            elif self.categories:
+                return sorted(self.categories, key=lambda c: 0 if c.is_positive else 1)[
+                    0
+                ].example()
+
+        raise NotImplementedError(self)
+
+    def negate(self) -> "Optional[Character]":
+        if self.is_any:
+            return None
+        return Character(
+            literals=self.literals,
+            categories=self.categories,
+            positive=not self.positive,
+        )
+
+    def contains(self, subgroup: "Character") -> bool:
+        if self.is_any:
+            return True
+        if subgroup.is_any:
+            return False
+        if subgroup == self:
+            return True
+
+        if self._is_positive_literal and subgroup._is_positive_literal:
+            return not (subgroup.literals - self.literals)
+        if self._is_positive_category and subgroup._is_positive_category:
+            return not (subgroup.categories - self.categories)
+
+        raise NotImplementedError  # Lazy, TODO: do full match
+
+    def matches(self, literal: int) -> bool:
+        if self.is_any:
+            return True
+        if self.literals is not None and literal in self.literals:
+            return self.positive
+        if self.categories:
+            for cat in self.categories:
+                if cat.contains(literal):
+                    return self.positive
+        return not self.positive
+
+
+def nice_characters():
+    for c in string.printable[:-5]:
+        yield ord(c)
+
+
+def literal_repr(literal: int) -> str:
+    c = chr(literal)
+    if c in string.digits or c in string.ascii_letters:
+        return c
+    elif c in string.punctuation:
+        return f"{literal:02x}:{c}"
+    return f"{literal:02x}"
+
+
+def range_repr(r: Range) -> str:
+    return "[{}-{}]".format(literal_repr(r.min_val), literal_repr(r.max_val))
diff --git a/ozi_build/_files.py b/ozi_build/_files.py
new file mode 100644
index 0000000..d350721
--- /dev/null
+++ b/ozi_build/_files.py
@@ -0,0 +1,35 @@
+import os
+import os.path
+from glob import iglob
+from typing import List, Optional
+
+
+def _file_generator(
+    files_argument: List[str], is_glob: bool, filename_globs: List[str]
+):
+    if is_glob:
+        for fglob in files_argument:
+            yield from iglob(fglob, recursive=True)
+    else:
+        for f in files_argument:
+            if os.path.isdir(f):
+                for g in filename_globs:
+                    yield from iglob(os.path.join(f, "**", g), recursive=True)
+            else:
+                yield f
+
+
+def file_generator(
+    files_argument: List[str],
+    is_glob: bool,
+    filename_globs: List[str],
+    ignore: Optional[List[str]] = None,
+):
+    gen = _file_generator(files_argument, is_glob, filename_globs)
+    if ignore:
+        for f in gen:
+            if any(i in f for i in ignore):
+                continue
+            yield f
+    else:
+        yield from gen
diff --git a/ozi_build/_groupref.py b/ozi_build/_groupref.py
new file mode 100644
index 0000000..68a4712
--- /dev/null
+++ b/ozi_build/_groupref.py
@@ -0,0 +1,28 @@
+from ._repeat import FiniteRepeat, InfiniteRepeat
+from ._branch import Branch
+from ._sequence import Sequence
+
+
+def subpattern_to_groupref(subpattern):
+    if subpattern is None:
+        return None
+    if subpattern.starriness == 0:
+        return subpattern
+    if isinstance(subpattern, FiniteRepeat):
+        return subpattern.alter_repeat(
+            subpattern_to_groupref(subpattern.repeat),
+        )
+    if isinstance(subpattern, InfiniteRepeat):
+        return FiniteRepeat(
+            subpattern_to_groupref(subpattern.repeat),
+            subpattern.minimum_repeats,
+            subpattern.minimum_repeats + 1,
+        )
+    if isinstance(subpattern, Branch):
+        return Branch(
+            [subpattern_to_groupref(b) for b in subpattern.branches],
+            subpattern.optional,
+        )
+    if isinstance(subpattern, Sequence):
+        return Sequence([subpattern_to_groupref(e) for e in subpattern.elements])
+    return subpattern
diff --git a/ozi_build/_ranges.py b/ozi_build/_ranges.py
new file mode 100644
index 0000000..c62bd46
--- /dev/null
+++ b/ozi_build/_ranges.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+from typing import Iterator, List, Set, Tuple
+
+
+@dataclass(frozen=True)
+class Range:
+    min_val: int
+    max_val: int
+
+
+def lits_to_ranges(
+    literals: Iterator[int],
+) -> Tuple[Set[int], Set[Range]]:
+    lits = set()
+    ranges = set()
+    buf: List[int] = []
+    for lit in sorted(literals):
+        if len(buf) and buf[-1] != lit - 1:
+            # Discontinuity
+            if len(buf) < 3:
+                lits.update(buf)
+            else:
+                ranges.add(Range(buf[0], buf[-1]))
+            buf = [lit]
+        else:
+            buf.append(lit)
+
+    if len(buf) == 1:
+        lits.add(buf[0])
+    elif len(buf) > 1:
+        ranges.add(Range(buf[0], buf[-1]))
+
+    return lits, ranges
diff --git a/ozi_build/_redos.py b/ozi_build/_redos.py
new file mode 100644
index 0000000..2d47320
--- /dev/null
+++ b/ozi_build/_redos.py
@@ -0,0 +1,225 @@
+import logging
+from dataclasses import dataclass
+from typing import Iterator, List, Optional
+
+from ._at import EndOfString
+from ._branch import Branch
+from ._char import Character
+from ._repeat import InfiniteRepeat, Repeat
+from ._sequence import Sequence
+
+
+@dataclass(frozen=True)
+class Redos:
+    starriness: int
+    prefix_sequence: Sequence
+    redos_sequence: Sequence
+    repeated_character: Character
+    killer: Optional[Character]
+
+    @property
+    def example_prefix(self) -> str:
+        return self.prefix_sequence.example()
+
+    def example(self, js_flavour: bool = False) -> str:
+        repeated_char = self.repeated_character
+        killer = self.killer
+        # Try to find a repeating character which is also a killer
+        if killer and (killing_repeat := repeated_char & killer):
+            repeated_char = killing_repeat
+            killer = None
+
+        prefix = (
+            self.example_prefix.encode("unicode_escape").decode().replace("'", "\\'")
+        )
+        repeated_char_s = (
+            repeated_char.example()
+            .encode("unicode_escape")
+            .decode()
+            .replace("'", "\\'")
+        )
+        e = f"'{prefix}' + " if prefix else ""
+        if js_flavour:
+            e += f"'{repeated_char_s}'.repeat(3456)"
+        else:
+            e += f"'{repeated_char_s}' * 3456"
+
+        if killer:
+            killer_s = (
+                killer.example().encode("unicode_escape").decode().replace("'", "\\'")
+            )
+            return e + f" + '{killer_s}'"
+        return e
+
+
+def find(sequence, flags: int = 0) -> List[Redos]:
+    """
+    Returns Redos objects sorted by severity (most starry first), then sorted by example_prefix (shortest first).
+    """
+    redos = []
+    for r in find_redos(sequence):
+        if r not in redos:
+            redos.append(r)
+    return sorted(redos, key=lambda r: -r.starriness * 1000 + len(r.example_prefix))
+
+
+def expand_branches(seq: Sequence) -> Iterator[Sequence]:
+    """
+    This could blow up exponentially, but it's nicer for now to expand branches.
+    """
+    head = []
+    for i, elem in enumerate(seq.elements):
+        if isinstance(elem, Branch):
+            for b in elem.get_branches():
+                head_plus_branch = head + (
+                    [] if not b else [b] if not isinstance(b, Sequence) else b.elements
+                )
+                for tail in expand_branches(Sequence(seq.elements[i + 1 :])):
+                    yield Sequence(head_plus_branch + tail.elements)
+            return  # All processing in yields
+        elif isinstance(elem, Repeat) and elem.starriness > 10:
+            logging.debug("Exponential: %s", elem)
+            if isinstance(elem.repeat, (Sequence, Branch)):
+                for tail in expand_branches(Sequence(seq.elements[i + 1 :])):
+                    yield Sequence(head + [elem] + tail.elements)
+                    for pseudo_repeat in elem.repeat.matching_repeats():
+                        logging.debug("Pseudo repeat %s", pseudo_repeat)
+                        yield Sequence(
+                            head + [elem.alter_repeat(pseudo_repeat)] + tail.elements
+                        )
+            else:
+                head.append(elem)
+        else:
+            head.append(elem)
+    yield Sequence(head)
+
+
+def find_redos(sequence_with_branches) -> Iterator[Redos]:
+    logging.debug(sequence_with_branches)
+    if not isinstance(
+        sequence_with_branches, Sequence
+    ):  # singleton like Branch (ab|cd)
+        sequence_with_branches = Sequence([sequence_with_branches])
+    for seq in expand_branches(sequence_with_branches):
+        yield from find_redos_in_branchless_sequence(seq)
+
+
+def find_redos_in_branchless_sequence(seq: Sequence) -> Iterator[Redos]:
+    logging.debug(seq)
+    for i, elem in enumerate(seq.elements):
+        # TODO branches
+        if isinstance(elem, InfiniteRepeat) and (c := elem.overall_character_class()):
+            yield from make_redos(seq, i, i + 1, c, elem.starriness)
+
+
+def make_redos(
+    seq: Sequence,
+    sequence_start: int,
+    continue_from: int,
+    repeated_character: Character,
+    starriness: int,
+) -> Iterator[Redos]:
+    # TODO branches
+    character_history = [repeated_character]
+    logging.debug(
+        "Make ReDoS %d %d %s %d",
+        sequence_start,
+        continue_from,
+        repeated_character,
+        starriness,
+    )
+    for current_index in range(continue_from, len(seq)):
+        elem = seq.elements[current_index]
+
+        if isinstance(elem, EndOfString):
+            # May need to go back before the matching sequence to calculate $
+            elem.set_character(seq.elements[:current_index])
+
+        eoc = elem.overall_character_class()
+        new_c = repeated_character & eoc
+        logging.debug("%s & %s = %s (for %s)", repeated_character, eoc, new_c, elem)
+
+        # Handle optional elements
+        if elem.minimum_length == 0:
+            if elem.starriness:
+                # If we have a*, we branch and try with and without it
+                if new_c != repeated_character:
+                    # Only branch if we have [ab]a* : if we have aa* or a[ab]* then the character class doesn't change
+                    # Try without this element
+                    yield from make_redos(
+                        seq,
+                        sequence_start,
+                        current_index + 1,
+                        repeated_character,
+                        starriness,
+                    )
+            else:
+                continue  # Don't care about finite repeats (abc)? or a{,4}
+
+        # print(repeated_character, "+", elem.overall_character_class(), "->", new_c)
+        if new_c is None:
+            # This element will force backtracking as it's incompatible with `repeated_character`
+            if elem.minimum_length and starriness > 2:
+                yield redos_found(
+                    seq,
+                    sequence_start,
+                    current_index,
+                    repeated_character,
+                    starriness,
+                    None,
+                )
+            return
+
+        starriness += elem.starriness
+        repeated_character = new_c
+        character_history.append(new_c)
+
+    # Everything matched! We need to work backwards and find a 'killer' to cause backtracking if we want ReDoS
+    logging.debug("Backtracking: %s", character_history)
+    for current_index in reversed(range(continue_from, len(seq))):
+        elem = seq.elements[current_index]
+        character_history.pop()
+        starriness -= elem.starriness
+        if starriness <= 2:
+            return
+        # Can't get backtracking by not matching optional groups
+        if elem.minimum_length > 0:
+            # Find a character which matches the sequence and then fails on the killer
+            if (match := elem.overall_character_class()) and (killer := match.negate()):
+                old_repeat = character_history.pop()
+                logging.debug(
+                    "%s (for %s): killer=%s, repeat=%s",
+                    match,
+                    elem,
+                    killer,
+                    old_repeat,
+                )
+                yield redos_found(
+                    seq,
+                    sequence_start,
+                    current_index,
+                    old_repeat,
+                    starriness,
+                    killer,
+                )
+                return
+    logging.debug("Backtracking: FAIL")
+
+
+def redos_found(
+    seq: Sequence,
+    start: int,
+    backtrack_at: int,
+    repeated_character: Character,
+    starriness: int,
+    killer: Optional[Character],
+) -> Redos:
+    # TODO: Try to include some skipped optional parts (like `?`) just to make it nicer
+    logging.debug("ReDoS found")
+    return Redos(
+        starriness,
+        Sequence(seq.elements[:start]),
+        Sequence(seq.elements[start : backtrack_at + 1]),
+        repeated_character,
+        killer,
+    )
diff --git a/ozi_build/_repeat.py b/ozi_build/_repeat.py
new file mode 100644
index 0000000..9885cec
--- /dev/null
+++ b/ozi_build/_repeat.py
@@ -0,0 +1,71 @@
+from dataclasses import dataclass
+from typing import Any, Optional
+
+from ._char import Character
+
+
+@dataclass(frozen=True)
+class Repeat:
+    repeat: Any
+    minimum_repeats: int
+
+    def example(self) -> str:
+        if self.minimum_repeats == 0:
+            return ""
+        return self.repeat.example() * self.minimum_repeats
+
+    @property
+    def minimum_length(self) -> int:
+        return self.minimum_repeats * self.repeat.minimum_length
+
+    @property
+    def starriness(self) -> int:
+        return self.repeat.starriness  # ? and {1,30} are not that starry
+
+    def exact_character_class(self) -> Optional[Character]:
+        """
+        Repeated character e.g. [bc] for [bc]*, or [a] for (aaa)*
+        """
+        return self.repeat.exact_character_class()
+
+    def overall_character_class(self) -> Optional[Character]:
+        """
+        (23)+ -> None, (22)* -> 2
+        """
+        return self.repeat.overall_character_class()
+
+    def maximal_character_class(self) -> Character:
+        """
+        (23)+ -> [23], (22)* -> 2, (23*)* -> [23]
+        Useful for finding a way to kill a sequence like a(bc*)*$
+        """
+        return self.repeat.maximal_character_class()
+
+
+@dataclass(frozen=True)
+class InfiniteRepeat(Repeat):
+    forced_starriness: Optional[int] = None
+
+    @property
+    def starriness(self) -> int:
+        if self.forced_starriness is not None:
+            return self.forced_starriness
+        # a*a*a* is cubic whereas (a*)* is exponential but here we just call it 10
+        return 1 + self.repeat.starriness * 10
+
+    def __repr__(self) -> str:
+        return f"{self.repeat}{{{self.minimum_repeats}+}}"
+
+    def alter_repeat(self, repeat) -> "InfiniteRepeat":
+        return InfiniteRepeat(repeat, self.minimum_repeats)
+
+
+@dataclass(frozen=True)
+class FiniteRepeat(Repeat):
+    maximum_repeats: int
+
+    def __repr__(self) -> str:
+        return f"{self.repeat}{{{self.minimum_repeats},{self.maximum_repeats}}}"
+
+    def alter_repeat(self, repeat) -> "FiniteRepeat":
+        return FiniteRepeat(repeat, self.minimum_repeats, self.maximum_repeats)
diff --git a/ozi_build/_sequence.py b/ozi_build/_sequence.py
new file mode 100644
index 0000000..76b2d53
--- /dev/null
+++ b/ozi_build/_sequence.py
@@ -0,0 +1,115 @@
+from dataclasses import dataclass
+from typing import List, Optional
+
+from ._char import Character
+from ._repeat import InfiniteRepeat
+
+
+@dataclass(frozen=True)
+class Sequence:
+    elements: List
+
+    @property
+    def starriness(self):
+        return sum(e.starriness for e in self.elements)
+
+    def __len__(self):
+        return len(self.elements)
+
+    def example(self) -> str:
+        return "".join(e.example() for e in self.elements)
+
+    @property
+    def minimum_length(self) -> int:
+        accum: int = 0
+        for e in self.elements:
+            accum += e.minimum_length
+        return accum
+
+    def exact_character_class(self) -> Optional[Character]:
+        """
+        aa*a -> a, abc -> None, [ab][abc] -> None
+        """
+        first = self.elements[0].exact_character_class()
+        if first is None:
+            return None
+        for c in self.elements[1:]:
+            if c != first:
+                return None
+        return c
+
+    def overall_character_class(self) -> Optional[Character]:
+        """
+        aa*a -> a, abc -> None, [ab][abc] -> [ab]
+        a?b -> b, a+b -> None, [ab]+b* -> b
+        """
+        c = Character.ANY()
+        for e in self.elements:
+            c &= e.overall_character_class()
+            if not c:
+                return None
+        return c
+
+    def matching_repeats(self):
+        """Complicated way to get the possible character classes for a sequence"""
+        c = Character.ANY()
+        has_mandatory = False
+        optionals = []
+        starriness = 0
+        minimum_length = 0
+        for e in self.elements:
+            if e.minimum_length:
+                c &= e.overall_character_class()
+                if not c:
+                    return None
+                has_mandatory = True
+                starriness += e.starriness
+                minimum_length += e.minimum_length
+            elif e.starriness > 0:
+                optionals.append(e)
+        possibilities = {c: starriness} if has_mandatory else {}
+        for e in optionals:
+            if new_c := e.overall_character_class() & c:
+                if new_c in possibilities:
+                    possibilities[new_c] += e.starriness
+                else:
+                    possibilities[new_c] = e.starriness
+
+        if len(possibilities) > 1:
+            # (a*[ab]*a*[bc]*[bcd]*.+a*)*@ has classes {.: 1, [a]: 5, [[a-b]]: 2, [[b-c]]: 3, [[b-d]]: 2, [b]: 3}
+            # This could blow up!
+            poss_chars = list(possibilities.items())
+            merged_chars = {}
+            while poss_chars:
+                c_a, s_a = poss_chars.pop()
+                for c_b, s_b in poss_chars:
+                    if (merged := c_a & c_b) is not None:
+                        if merged == c_a:
+                            possibilities[c_a] += s_b
+                        elif merged == c_b:
+                            possibilities[c_b] += s_a
+                        else:
+                            if merged not in merged_chars:
+                                merged_chars[merged] = set()
+                            merged_chars[merged] |= {(c_a, s_a), (c_b, s_b)}
+            for merged, set_of_chars in merged_chars.items():
+                possibilities[merged] = sum(s for _, s in set_of_chars)
+
+        for cc, s in possibilities.items():
+            if s:
+                yield InfiniteRepeat(cc, minimum_length, forced_starriness=s)
+
+    def maximal_character_class(self) -> Character:
+        """
+        Only useful when this Sequence is inside a Repeat
+        a*b -> [ab], ab* -> [ab]
+        Since forcing backtracking for (bc*)$
+        """
+        c = None
+        for e in self.elements:
+            if (mcc := e.maximal_character_class()) is not None:
+                c = mcc | c
+        return c
+
+    def __repr__(self) -> str:
+        return "SEQ{ " + " ".join(str(e) for e in self.elements) + " }"
diff --git a/ozi_build/_sre.py b/ozi_build/_sre.py
new file mode 100644
index 0000000..b27bc9f
--- /dev/null
+++ b/ozi_build/_sre.py
@@ -0,0 +1,205 @@
+import sre_constants
+import sre_parse
+from typing import List, Optional, Set, Tuple, Union  # noqa: I100, I201
+
+from ._at import EndOfString
+from ._branch import Branch, make_branch
+from ._categories import Category, covers_any
+from ._char import Character
+from ._groupref import subpattern_to_groupref
+from ._repeat import FiniteRepeat, InfiniteRepeat
+from ._sequence import Sequence
+
+SreConstant = sre_constants._NamedIntConstant
+SreOpData = Union[Tuple, List, int, SreConstant, None]
+SreOp = Tuple[SreConstant, SreOpData]
+
+
+class SreOpParser:
+    def __init__(self):
+        self._groups = {}
+        self.negative_lookahead: Optional[Character] = None
+
+    def parse_sre(self, pattern: str, flags: int = 0):
+        return self.sequence_or_singleton(sre_parse.parse(pattern, flags))
+
+    def parse_op(self, op: SreConstant, data: SreOpData):
+        return getattr(self, f"from_{op.name}")(data)
+
+    def sequence_or_singleton(self, ops: List[SreOp]):
+        elems = []
+        for p in (self.parse_op(*op) for op in ops):
+            if p is not None:
+                if isinstance(p, Sequence):
+                    elems.extend(p.elements)
+                else:
+                    elems.append(p)
+        if len(elems) == 0:
+            return None
+        if len(elems) == 1:
+            return elems[0]
+        return Sequence(elems)
+
+    def from_SUBPATTERN(self, data: Tuple[int, int, int, List[SreOp]]):
+        ref = data[0]
+        elements = data[3]
+        result = self.sequence_or_singleton(elements)
+        self._groups[ref] = result
+        return result
+
+    def from_MAX_REPEAT(
+        self,
+        data: Tuple[
+            int,
+            Union[int, SreConstant],
+            List[SreOp],
+        ],
+    ) -> Union[FiniteRepeat, InfiniteRepeat, Branch, None]:
+        minimum, maximum, elements = data
+        infinite = maximum is sre_constants.MAXREPEAT
+        # TODO support negative lookahead before repeat with minimum = 0
+        negative_lookahead = self.use_negative_lookahead()
+        repeatable = self.sequence_or_singleton(elements)
+        if repeatable is None:
+            return None
+        if (
+            minimum == 0
+            and maximum == 1
+            and repeatable.starriness
+            and not repeatable.overall_character_class()
+        ):
+            # Interesting (starry) optional sequences as branches (ab*)? -> (ab*|)
+            return make_branch([repeatable, None])
+        if infinite:
+            if (
+                negative_lookahead is not None
+                and minimum > 0
+                and isinstance(repeatable, Character)
+            ):
+                return Sequence(
+                    [
+                        negative_lookahead & repeatable,
+                        InfiniteRepeat(repeatable, minimum - 1),
+                    ]
+                )
+            return InfiniteRepeat(repeatable, minimum)
+        if (
+            negative_lookahead is not None
+            and minimum > 0
+            and maximum > 1
+            and isinstance(repeatable, Character)
+        ):
+            return Sequence(
+                [
+                    negative_lookahead & repeatable,
+                    FiniteRepeat(repeatable, minimum - 1, maximum - 1),
+                ]
+            )
+        return FiniteRepeat(repeatable, minimum, maximum)
+
+    def from_MIN_REPEAT(self, data):
+        return self.from_MAX_REPEAT(data)
+
+    def from_BRANCH(
+        self, data: Tuple[None, List[List[SreOp]]]
+    ) -> Union[Branch, FiniteRepeat, Character, None]:
+        # sre already transforms (a|b|c) -> [abc]
+        branches = data[1]
+        negative_lookahead = self.use_negative_lookahead()
+        processed_branches = []
+        for branch in branches:
+            self.negative_lookahead = negative_lookahead
+            processed_branches.append(self.sequence_or_singleton(branch))
+        self.negative_lookahead = None
+        return make_branch(processed_branches)
+
+    def from_AT(self, at: SreConstant):
+        # TODO: handling for multiline
+        # TODO: handling for \\b
+        self.use_negative_lookahead()
+        if at is sre_constants.AT_END:
+            return EndOfString()
+        return None
+
+    def from_ANY(self, _: None) -> Character:
+        if negative_lookahead := self.use_negative_lookahead():
+            return negative_lookahead
+        return Character.ANY()
+
+    def from_LITERAL(self, literal: int) -> Character:
+        if negative_lookahead := self.use_negative_lookahead():
+            return Character.LITERAL(literal) & negative_lookahead
+        return Character.LITERAL(literal)
+
+    def from_NOT_LITERAL(self, not_literal: int) -> Character:
+        if negative_lookahead := self.use_negative_lookahead():
+            return (
+                Character(literals={not_literal}, positive=False) & negative_lookahead
+            )
+        return Character(literals={not_literal}, positive=False)
+
+    def from_IN(self, data: List[SreOp]) -> Character:
+        literals: Optional[Set[int]] = None
+        categories: Optional[Set] = None
+        positive = True
+        if len(data) > 1 and data[0] == (sre_constants.NEGATE, None):
+            positive = False
+            data = data[1:]
+        for in_op, in_data in data:
+            if in_op is sre_constants.LITERAL:
+                if literals is None:
+                    literals = set()
+                literals.add(in_data)
+            elif in_op is sre_constants.RANGE:
+                if literals is None:
+                    literals = set()
+                min_val, max_val = in_data
+                literals.update(range(min_val, max_val + 1))
+            elif in_op is sre_constants.CATEGORY:
+                if categories is None:
+                    categories = set()
+                categories.add(Category[in_data.name[9:]])
+
+        if categories and covers_any(categories):
+            return self.from_ANY(None) if positive else None
+        if negative_lookahead := self.use_negative_lookahead():
+            return Character(literals, categories, positive) & negative_lookahead
+        return Character(literals, categories, positive)
+
+    def from_GROUPREF(self, ref: int):
+        return subpattern_to_groupref(self._groups.get(ref))
+
+    @staticmethod
+    def from_GROUPREF_EXISTS(_) -> None:
+        return None  # No intention to implement this properly
+
+    @staticmethod
+    def from_ASSERT(_) -> None:
+        return None  # No intention to implement this properly
+
+    def from_ASSERT_NOT(self, data) -> None:
+        typ, ops = data
+        if typ == 1:
+            if len(ops) == 1:
+                character_op = ops[0]
+                if character_op[0] in (
+                    sre_constants.LITERAL,
+                    sre_constants.NOT_LITERAL,
+                    sre_constants.IN,
+                ):
+                    negative_lookahead = self.use_negative_lookahead()
+                    not_assertion = self.parse_op(*character_op)
+                    if not_assertion and (assertion := not_assertion.negate()):
+                        self.negative_lookahead = assertion
+                        if negative_lookahead is not None:
+                            self.negative_lookahead &= negative_lookahead
+                    else:
+                        self.negative_lookahead = negative_lookahead
+
+        return None  # No intention to implement this fully
+
+    def use_negative_lookahead(self) -> Optional[Character]:
+        if self.negative_lookahead is not None:
+            negative_lookahead = self.negative_lookahead
+            self.negative_lookahead = None
+            return negative_lookahead
diff --git a/ozi_build/_text.py b/ozi_build/_text.py
new file mode 100644
index 0000000..bdef2e9
--- /dev/null
+++ b/ozi_build/_text.py
@@ -0,0 +1,51 @@
+POLYNOMIAL_DEGREES = [
+    "linear",
+    "quadratic",
+    "cubic",
+    "quartic",
+    "quintic",
+    "sextic",
+    "septic",
+    "octic",
+    "nonic",
+    "decic",
+]
+
+
+class TextOutput:
+    def __init__(self, js_flavour: bool = False):
+        self.first_for_regex = True
+        self.regexes = 0
+        self.js_flavour = js_flavour
+
+    def next(self):
+        """Next regex being processed."""
+        self.first_for_regex = True
+        self.regexes += 1
+
+    def record(self, redos, pattern, *, filename=None, lineno=None, context=None):
+        if self.first_for_regex:
+            if filename:
+                if lineno is not None:
+                    print(f"Vulnerable regex in {filename} #{lineno}")
+                else:
+                    print(f"Vulnerable regex in {filename}")
+            print(f"Pattern: {pattern}")
+            if context:
+                print(f"Context: {context}")
+            print("---")
+            self.first_for_regex = False
+        print(redos)
+        stars = "\u2b50" * min(10, redos.starriness)
+        degree = (
+            "exponential"
+            if redos.starriness > 10
+            else POLYNOMIAL_DEGREES[redos.starriness - 1]
+            if redos.starriness > 0
+            else "?"
+        )
+        print(f"Worst-case complexity: {redos.starriness} {stars} ({degree})")
+        print(f"Repeated character: {redos.repeated_character}")
+        if redos.killer:
+            print(f"Final character to cause backtracking: {redos.killer}")
+        print(f"Example: {redos.example(self.js_flavour)}\n")
diff --git a/ozi_build/_util.py b/ozi_build/_util.py
index 9f80aa2..9bcdee8 100644
--- a/ozi_build/_util.py
+++ b/ozi_build/_util.py
@@ -2,11 +2,62 @@
 import os
 import subprocess
 import sys
+from fileinput import filename
+import re
 
 from .pep425tags import get_abbr_impl
 from .pep425tags import get_abi_tag
 from .pep425tags import get_impl_ver
 from .pep425tags import get_platform_tag
+from ._text import TextOutput
+from ._redos import find
+from ._sre import SreOpParser
+
+
+class PotentialRedos(RuntimeError):
+    ...
+
+
+def handle_file(tomldata, filename: str, output: TextOutput):
+    if isinstance(tomldata, (list, dict)):
+        TomlWalker(filename, output).handle(tomldata)
+
+
+class TomlWalker:
+    def __init__(self, filename: str, output: TextOutput):
+        self.filename = filename
+        self.output = output
+
+    def handle(self, elem):
+        if isinstance(elem, str) and len(elem) > 5:
+            try:
+                parsed = SreOpParser().parse_sre(elem)
+            except re.error:
+                return  # We will have many strings which aren't actually regexes
+            try:
+                self.output.next()
+                for redos in find(parsed):
+                    if redos.starriness > 2:
+                        self.output.record(
+                            redos ,
+                            elem,
+                            filename=self.filename,
+                        )
+                        raise PotentialRedos(redos, elem, filename)
+            except Exception as e:
+                raise e
+        elif isinstance(elem, list):
+            for _elem in elem:
+                self.handle(_elem)
+        elif isinstance(elem, dict):
+            for _elem in elem.values():
+                self.handle(_elem)
+
+
+def check_pyproject_regexes(file):
+    output = TextOutput()
+    handle_file(file, 'pyproject.toml', output)
+
 
 PKG_INFO = """\
 Metadata-Version: 2.2
diff --git a/ozi_build/config.py b/ozi_build/config.py
index b65f98b..24ebe6b 100644
--- a/ozi_build/config.py
+++ b/ozi_build/config.py
@@ -3,6 +3,7 @@
 import os
 import sys
 
+from ._util import check_pyproject_regexes
 from .metadata import auto_python_version
 from .metadata import check_pkg_info_file
 from .metadata import check_requires_python
@@ -27,6 +28,7 @@
 class Config:
     def __init__(self, builddir=None):
         config = self.__get_config()
+        check_pyproject_regexes(config)
         self.__metadata = config['tool']['ozi-build']['metadata']
         self.__entry_points = config['tool']['ozi-build'].get(
             'entry-points', []
diff --git a/ozi_build/meson.build b/ozi_build/meson.build
index c185eeb..b36640a 100644
--- a/ozi_build/meson.build
+++ b/ozi_build/meson.build
@@ -1,12 +1,24 @@
 sources = [
     '__init__.py',
-    '_pyc_wheel.py',
     '_util.py',
+    '_at.py',
+    '_branch.py',
+    '_categories.py',
+    '_char.py',
+    '_files.py',
+    '_groupref.py',
+    '_ranges.py',
+    '_redos.py',
+    '_repeat.py',
+    '_sequence.py',
+    '_sre.py',
+    '_text.py',
     'buildapi.py',
     'config.py',
     'metadata.py',
     'pep425tags.py',
     'schema.py',
+    '_pyc_wheel.py',
 ]
 foreach source: sources
     fs.copyfile(source)
@@ -16,3 +28,4 @@ foreach source: sources
         subdir : 'ozi_build'
     )
 endforeach
+