-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: rjdbcm <[email protected]>
- Loading branch information
Showing
17 changed files
with
1,319 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,21 +4,38 @@ OZI.build | |
|
||
This is the `OZI-Project <https://github.com/OZI-Project>`_ maintained fork of the mesonpep517 0.2 tag. | ||
|
||
This is a simple module that implements pep517 for the meson build system. | ||
This is a module that implements PEP-517 for the meson build system. | ||
|
||
This means that you only need to provide a ``pyproject.toml`` in your project | ||
source root to be able to publish your project built with meson on PyPI | ||
and to create a wheel for the project. | ||
|
||
Other features include: | ||
|
||
* compiling modules to bytecode with pyc_wheel | ||
* scanning ``pyproject.toml`` for exploitable ReDoS patterns with regexploit | ||
|
||
For more information have a look at `the documentation <https://docs.oziproject.dev/en/stable/ozi_build.html>`_ | ||
|
||
OZI.build is licensed under Apache-2.0 and includes ``pyc_wheel`` and | ||
portions of ``wheel`` whose copyright information is reproduced here. | ||
License | ||
------- | ||
|
||
OZI.build is licensed under Apache-2.0 and includes ``regexploit``, | ||
``pyc_wheel`` and portions of ``wheel`` whose copyright information is | ||
reproduced here. | ||
|
||
Apache-2.0 contributors | ||
^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
``regexploit`` Copyright (c) 2021 Ben Caller <[email protected]> | ||
|
||
``pyc_wheel`` Copyright (c) 2016 Grant Patten <[email protected]> | ||
|
||
``pyc_wheel`` Copyright (c) 2019-2021 Adam Karpierz <[email protected]> | ||
|
||
MIT contributors | ||
^^^^^^^^^^^^^^^^ | ||
|
||
``wheel`` Copyright (c) 2012-2014 Daniel Holth <[email protected]> and contributors. | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from dataclasses import dataclass | ||
from typing import List, Optional | ||
|
||
from ._char import Character | ||
from ._repeat import InfiniteRepeat, Repeat | ||
|
||
|
||
@dataclass | ||
class EndOfString: | ||
character: Optional[Character] = None | ||
|
||
@property | ||
def starriness(self): | ||
return 0 | ||
|
||
@property | ||
def minimum_length(self): | ||
return 1 # Meaningless really here | ||
|
||
def overall_character_class(self): | ||
return self.character | ||
|
||
def __repr__(self) -> str: | ||
return f"${self.character}" | ||
|
||
def __and__(self, other: Character) -> Optional[Character]: | ||
return other & self.character | ||
|
||
def example(self): | ||
return "\n" # ish | ||
|
||
def set_character(self, previous_elems: List): | ||
""" | ||
To force backtracking, the dollar will have to not match any previous groups until a mandatory group. | ||
This can perhaps be made more lenient. | ||
To cause backtracking on a long string of a's: | ||
a*a*a*$ -> Any [^a] | ||
[ab]+a*a*a*$ -> Any [^ab] (baaaaaaaaaaaab does not backtrack) | ||
b+a*a*a*$ -> Any [^a] | ||
.a*a*a*$ -> Any [^a] | ||
.+a*a*a*$ -> Cannot backtrack because everything gets matched by .+ :( | ||
""" | ||
self.character = None | ||
for elem in reversed(previous_elems): | ||
if elem.minimum_length > 0 and not isinstance(elem, InfiniteRepeat): | ||
return # xa*[ab]*a*$ -> [ab] | ||
c = ( | ||
elem.maximal_character_class() | ||
if isinstance(elem, Repeat) | ||
else elem.overall_character_class() | ||
) | ||
if c: | ||
if elem.minimum_length > 0 and (self.character & c) != self.character: | ||
# c is smaller than self.character (i.e. c is not an ANY) | ||
# x+a*[ab]*a*$ -> [ab] | ||
return | ||
self.character |= c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from dataclasses import dataclass | ||
from typing import Iterator, List, Optional | ||
|
||
from ._at import EndOfString | ||
from ._char import Character | ||
from ._repeat import FiniteRepeat, InfiniteRepeat | ||
from ._sequence import Sequence | ||
|
||
|
||
@dataclass(frozen=True) | ||
class Branch: | ||
branches: List | ||
optional: bool = False | ||
|
||
def get_branches(self) -> Iterator: | ||
for b in self.branches: | ||
yield b | ||
if self.optional: | ||
yield None | ||
|
||
@property | ||
def starriness(self) -> int: | ||
return max(b.starriness for b in self.branches) | ||
|
||
@property | ||
def minimum_length(self) -> int: | ||
return 0 if self.optional else min(b.minimum_length for b in self.branches) | ||
|
||
def overall_character_class(self) -> Optional[Character]: | ||
c = Character.ANY() | ||
for b in self.branches: | ||
c &= b.overall_character_class() | ||
if c is None: | ||
return None | ||
return c | ||
|
||
def maximal_character_class(self): | ||
return None # Really? | ||
|
||
def example(self) -> str: | ||
if self.optional: | ||
return "" | ||
return self.branches[0].example() | ||
|
||
def __len__(self) -> int: | ||
return len(self.branches) + int(self.optional) | ||
|
||
def __repr__(self) -> str: | ||
middle = " | ".join(str(b) for b in self.branches) | ||
return f"BR( {middle} ){'?' if self.optional else ''}" | ||
|
||
def matching_repeats(self): | ||
for b in self.branches: | ||
if b.starriness > 0: | ||
if isinstance(b, InfiniteRepeat): | ||
yield b | ||
elif isinstance(b, Sequence): | ||
yield from b.matching_repeats() | ||
|
||
|
||
def make_branch(branches: List): | ||
if len(branches) == 1: | ||
return branches[0] | ||
optional = False | ||
non_empty_branches = [b for b in branches if b and not isinstance(b, EndOfString)] | ||
if not non_empty_branches: | ||
return None | ||
if len(non_empty_branches) < len(branches): | ||
# (ab|cd|) -> (ab|cd)? | ||
optional = True | ||
if all(isinstance(b, Character) for b in non_empty_branches): | ||
# (a|b) -> [ab], (a|b|) -> [ab]? | ||
c = None | ||
for b in non_empty_branches: | ||
c |= b | ||
if optional: | ||
return FiniteRepeat(c, 0, 1) | ||
else: | ||
return c | ||
|
||
return Branch(non_empty_branches, optional) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import sys | ||
import unicodedata | ||
from enum import Enum, auto | ||
from typing import Set | ||
|
||
|
||
class Category(Enum): | ||
DIGIT = auto() | ||
NOT_DIGIT = auto() | ||
WORD = auto() | ||
NOT_WORD = auto() | ||
SPACE = auto() | ||
NOT_SPACE = auto() | ||
|
||
@property | ||
def is_positive(self) -> bool: | ||
return not self.name.startswith("NOT_") | ||
|
||
def negate(self) -> "Category": | ||
if self.is_positive: | ||
return Category[f"NOT_{self.name}"] | ||
else: | ||
return Category[self.name[4:]] | ||
|
||
def example(self) -> str: | ||
return EXAMPLE_FOR_CAT[self] | ||
|
||
def contains(self, literal: int) -> bool: | ||
c = chr(literal) | ||
unicat = unicodedata.category(c) | ||
if self is Category.DIGIT: | ||
return unicat == "Nd" | ||
if self is Category.NOT_DIGIT: | ||
return unicat != "Nd" | ||
if self is Category.WORD: | ||
return ( | ||
unicat[0] == "L" or unicat == "Nd" or literal == 0x5F | ||
) # underscore is a word character | ||
if self is Category.NOT_WORD: | ||
return unicat[0] != "L" and unicat != "Nd" and literal != 0x5F | ||
if self is Category.SPACE: | ||
return unicat == "Zs" or c in (" ", "\n", "\t", "\r", "\f", "\v") | ||
if self is Category.NOT_SPACE: | ||
return unicat != "Zs" and c not in (" ", "\n", "\t", "\r", "\f", "\v") | ||
|
||
|
||
CATS = {} | ||
|
||
|
||
def list_category(category, full_unicode: bool = False): | ||
if (cached := CATS.get(category)) : | ||
yield from cached | ||
for data in range((sys.maxunicode + 1) if full_unicode else 256): | ||
c = chr(data) | ||
unicat = unicodedata.category(c) | ||
if category is Category.DIGIT: | ||
if unicat == "Nd": | ||
yield data | ||
elif category is Category.NOT_DIGIT: | ||
if unicat != "Nd": | ||
yield data | ||
elif category is Category.WORD: | ||
if unicat[0] == "L" or unicat == "Nd" or data == 0x5F: | ||
yield data | ||
elif category is Category.NOT_WORD: | ||
if unicat[0] != "L" and unicat != "Nd" and data != 0x5F: | ||
yield data | ||
elif category is Category.SPACE: | ||
if unicat == "Zs" or c in (" ", "\n", "\t", "\r", "\f", "\v"): | ||
yield data | ||
elif category is Category.NOT_SPACE: | ||
if unicat != "Zs" and c not in (" ", "\n", "\t", "\r", "\f", "\v"): | ||
yield data | ||
|
||
|
||
def covers_any(categories: Set[Category]) -> bool: | ||
for c in categories: | ||
if c.is_positive and c.negate() in categories: | ||
return True | ||
return False | ||
|
||
|
||
# CATS[sre_parse.CATEGORY_DIGIT] = list(list_category(sre_parse.CATEGORY_DIGIT)) | ||
# CATS[sre_parse.CATEGORY_SPACE] = list(list_category(sre_parse.CATEGORY_SPACE)) | ||
# CATS[sre_parse.CATEGORY_WORD] = list(list_category(sre_parse.CATEGORY_WORD)) | ||
EXAMPLE_FOR_CAT = { | ||
Category.DIGIT: "4", | ||
Category.NOT_DIGIT: "!", | ||
Category.WORD: "w", | ||
Category.NOT_WORD: "$", | ||
Category.SPACE: " ", | ||
Category.NOT_SPACE: ".", | ||
} |
Oops, something went wrong.