Skip to content

Commit

Permalink
1.8.0: add regexploit
Browse files Browse the repository at this point in the history
Signed-off-by: rjdbcm <[email protected]>
  • Loading branch information
rjdbcm committed Dec 10, 2024
1 parent 6c83218 commit 5dea24f
Show file tree
Hide file tree
Showing 17 changed files with 1,319 additions and 5 deletions.
23 changes: 20 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,38 @@ OZI.build

This is the `OZI-Project <https://github.com/OZI-Project>`_ maintained fork of the mesonpep517 0.2 tag.

This is a simple module that implements pep517 for the meson build system.
This is a module that implements PEP-517 for the meson build system.

This means that you only need to provide a ``pyproject.toml`` in your project
source root to be able to publish your project built with meson on PyPI
and to create a wheel for the project.

Other features include:

* compiling modules to bytecode with pyc_wheel
* scanning ``pyproject.toml`` for exploitable ReDoS patterns with regexploit

For more information have a look at `the documentation <https://docs.oziproject.dev/en/stable/ozi_build.html>`_

OZI.build is licensed under Apache-2.0 and includes ``pyc_wheel`` and
portions of ``wheel`` whose copyright information is reproduced here.
License
-------

OZI.build is licensed under Apache-2.0 and includes ``regexploit``,
``pyc_wheel`` and portions of ``wheel`` whose copyright information is
reproduced here.

Apache-2.0 contributors
^^^^^^^^^^^^^^^^^^^^^^^

``regexploit`` Copyright (c) 2021 Ben Caller <[email protected]>

``pyc_wheel`` Copyright (c) 2016 Grant Patten <[email protected]>

``pyc_wheel`` Copyright (c) 2019-2021 Adam Karpierz <[email protected]>

MIT contributors
^^^^^^^^^^^^^^^^

``wheel`` Copyright (c) 2012-2014 Daniel Holth <[email protected]> and contributors.

Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
project('OZI.build', version : '1.7.2', license : 'apache-2.0')
project('OZI.build', version : '1.8.0', license : 'apache-2.0')
fs = import('fs')
python = import('python').find_installation()
subdir('ozi_build')
Expand Down
58 changes: 58 additions & 0 deletions ozi_build/_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from dataclasses import dataclass
from typing import List, Optional

from ._char import Character
from ._repeat import InfiniteRepeat, Repeat


@dataclass
class EndOfString:
character: Optional[Character] = None

@property
def starriness(self):
return 0

@property
def minimum_length(self):
return 1 # Meaningless really here

def overall_character_class(self):
return self.character

def __repr__(self) -> str:
return f"${self.character}"

def __and__(self, other: Character) -> Optional[Character]:
return other & self.character

def example(self):
return "\n" # ish

def set_character(self, previous_elems: List):
"""
To force backtracking, the dollar will have to not match any previous groups until a mandatory group.
This can perhaps be made more lenient.
To cause backtracking on a long string of a's:
a*a*a*$ -> Any [^a]
[ab]+a*a*a*$ -> Any [^ab] (baaaaaaaaaaaab does not backtrack)
b+a*a*a*$ -> Any [^a]
.a*a*a*$ -> Any [^a]
.+a*a*a*$ -> Cannot backtrack because everything gets matched by .+ :(
"""
self.character = None
for elem in reversed(previous_elems):
if elem.minimum_length > 0 and not isinstance(elem, InfiniteRepeat):
return # xa*[ab]*a*$ -> [ab]
c = (
elem.maximal_character_class()
if isinstance(elem, Repeat)
else elem.overall_character_class()
)
if c:
if elem.minimum_length > 0 and (self.character & c) != self.character:
# c is smaller than self.character (i.e. c is not an ANY)
# x+a*[ab]*a*$ -> [ab]
return
self.character |= c
81 changes: 81 additions & 0 deletions ozi_build/_branch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from dataclasses import dataclass
from typing import Iterator, List, Optional

from ._at import EndOfString
from ._char import Character
from ._repeat import FiniteRepeat, InfiniteRepeat
from ._sequence import Sequence


@dataclass(frozen=True)
class Branch:
branches: List
optional: bool = False

def get_branches(self) -> Iterator:
for b in self.branches:
yield b
if self.optional:
yield None

@property
def starriness(self) -> int:
return max(b.starriness for b in self.branches)

@property
def minimum_length(self) -> int:
return 0 if self.optional else min(b.minimum_length for b in self.branches)

def overall_character_class(self) -> Optional[Character]:
c = Character.ANY()
for b in self.branches:
c &= b.overall_character_class()
if c is None:
return None
return c

def maximal_character_class(self):
return None # Really?

def example(self) -> str:
if self.optional:
return ""
return self.branches[0].example()

def __len__(self) -> int:
return len(self.branches) + int(self.optional)

def __repr__(self) -> str:
middle = " | ".join(str(b) for b in self.branches)
return f"BR( {middle} ){'?' if self.optional else ''}"

def matching_repeats(self):
for b in self.branches:
if b.starriness > 0:
if isinstance(b, InfiniteRepeat):
yield b
elif isinstance(b, Sequence):
yield from b.matching_repeats()


def make_branch(branches: List):
if len(branches) == 1:
return branches[0]
optional = False
non_empty_branches = [b for b in branches if b and not isinstance(b, EndOfString)]
if not non_empty_branches:
return None
if len(non_empty_branches) < len(branches):
# (ab|cd|) -> (ab|cd)?
optional = True
if all(isinstance(b, Character) for b in non_empty_branches):
# (a|b) -> [ab], (a|b|) -> [ab]?
c = None
for b in non_empty_branches:
c |= b
if optional:
return FiniteRepeat(c, 0, 1)
else:
return c

return Branch(non_empty_branches, optional)
93 changes: 93 additions & 0 deletions ozi_build/_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import sys
import unicodedata
from enum import Enum, auto
from typing import Set


class Category(Enum):
DIGIT = auto()
NOT_DIGIT = auto()
WORD = auto()
NOT_WORD = auto()
SPACE = auto()
NOT_SPACE = auto()

@property
def is_positive(self) -> bool:
return not self.name.startswith("NOT_")

def negate(self) -> "Category":
if self.is_positive:
return Category[f"NOT_{self.name}"]
else:
return Category[self.name[4:]]

def example(self) -> str:
return EXAMPLE_FOR_CAT[self]

def contains(self, literal: int) -> bool:
c = chr(literal)
unicat = unicodedata.category(c)
if self is Category.DIGIT:
return unicat == "Nd"
if self is Category.NOT_DIGIT:
return unicat != "Nd"
if self is Category.WORD:
return (
unicat[0] == "L" or unicat == "Nd" or literal == 0x5F
) # underscore is a word character
if self is Category.NOT_WORD:
return unicat[0] != "L" and unicat != "Nd" and literal != 0x5F
if self is Category.SPACE:
return unicat == "Zs" or c in (" ", "\n", "\t", "\r", "\f", "\v")
if self is Category.NOT_SPACE:
return unicat != "Zs" and c not in (" ", "\n", "\t", "\r", "\f", "\v")


CATS = {}


def list_category(category, full_unicode: bool = False):
if (cached := CATS.get(category)) :
yield from cached
for data in range((sys.maxunicode + 1) if full_unicode else 256):
c = chr(data)
unicat = unicodedata.category(c)
if category is Category.DIGIT:
if unicat == "Nd":
yield data
elif category is Category.NOT_DIGIT:
if unicat != "Nd":
yield data
elif category is Category.WORD:
if unicat[0] == "L" or unicat == "Nd" or data == 0x5F:
yield data
elif category is Category.NOT_WORD:
if unicat[0] != "L" and unicat != "Nd" and data != 0x5F:
yield data
elif category is Category.SPACE:
if unicat == "Zs" or c in (" ", "\n", "\t", "\r", "\f", "\v"):
yield data
elif category is Category.NOT_SPACE:
if unicat != "Zs" and c not in (" ", "\n", "\t", "\r", "\f", "\v"):
yield data


def covers_any(categories: Set[Category]) -> bool:
for c in categories:
if c.is_positive and c.negate() in categories:
return True
return False


# CATS[sre_parse.CATEGORY_DIGIT] = list(list_category(sre_parse.CATEGORY_DIGIT))
# CATS[sre_parse.CATEGORY_SPACE] = list(list_category(sre_parse.CATEGORY_SPACE))
# CATS[sre_parse.CATEGORY_WORD] = list(list_category(sre_parse.CATEGORY_WORD))
EXAMPLE_FOR_CAT = {
Category.DIGIT: "4",
Category.NOT_DIGIT: "!",
Category.WORD: "w",
Category.NOT_WORD: "$",
Category.SPACE: " ",
Category.NOT_SPACE: ".",
}
Loading

0 comments on commit 5dea24f

Please sign in to comment.