Skip to content

Commit

Permalink
Merge pull request #96 from aphedges/deterministic-keyword-grammar
Browse files Browse the repository at this point in the history
Make keyword-based grammar creation deterministic
  • Loading branch information
mjspeck authored Jun 5, 2023
2 parents 8804169 + 4eccbf1 commit e23b612
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 23 deletions.
27 changes: 14 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@ Example:
```python3
>>> from daidepp import create_daide_grammar, daide_visitor
>>> grammar = create_daide_grammar(level=130)
>>> message = 'PRP (AND (AND (SLO (ENG)) (SLO (GER)) (SLO (RUS))) (AND (SLO (ENG)) (SLO (GER)) (SLO (RUS))))'
>>> message = 'PRP (AND (SLO (ENG)) (SLO (GER)) (SLO (RUS)))'
>>> parse_tree = grammar.parse(message)
>>> output = daide_visitor.visit(parse_tree) # object composed of dataclass objects in keywords.py
>>> print(output)
`PRP ( AND ( AND ( SLO ( ENG ) ) ( SLO ( GER ) ) ( SLO ( RUS ) ) ) ( AND ( SLO ( ENG ) ) ( SLO ( GER ) ) ( SLO ( RUS ) ) ) )`
>>> str(output)
'PRP ( AND ( SLO ( ENG ) ) ( SLO ( GER ) ) ( SLO ( RUS ) ) )'
```

The daide_visitor outputs a dataclass that can be used to access useful information about the message.
```python3
>>> grammar = create_daide_grammar(level=130, allow_just_arrangement=True) # allows for messages without PRP
>>> from daidepp import create_daide_grammar, daide_visitor
>>> grammar = create_daide_grammar(level=130, string_type="arrangement") # allows for messages without PRP
>>> parse_tree = grammar.parse('ALY (GER FRA) VSS (TUR ITA)')
>>> output = daide_visitor.visit(parse_tree)
>>> print(output)
`ALY ( GER FRA ) VSS ( TUR ITA )`
>>> print(output.aly_power)
['GER', 'FRA']
>>> print(output.vss_power)
['TUR','ITA']
>>> str(output)
'ALY ( FRA GER ) VSS ( ITA TUR )'
>>> output.aly_powers
('FRA', 'GER')
>>> output.vss_powers
('ITA', 'TUR')
```

If the DAIDE token is not in the grammar or if the message is malformed, the parser will just throw an exception. We're currently working on returning a list of unrecognized tokens instead of just erroring out.
Expand All @@ -51,9 +52,9 @@ In addition, DAIDE strings can be constructed using the classes in [`base_keywor
Example:

```python3
>>> from daidepp import AND, PRP, PCE
>>> from daidepp import AND, PCE
>>> str(AND(PCE("AUS", "ENG"), PCE("AUS", "ENG"), PCE("AUS", "ENG", "FRA")))
`AND ( PCE ( AUS ENG ) ) ( PCE ( AUS ENG ) ) ( PCE ( AUS ENG FRA ) )`
'AND ( PCE ( AUS ENG ) ) ( PCE ( AUS ENG FRA ) )'
```
Each keyword class uses different parameters for instantiation, so it is recommended to carefully follow the type hints or checkout [`tests/keywords`](./tests/keywords/), which provides examples for each class.

Expand All @@ -63,7 +64,7 @@ Grammar can also be created using a subset of press keywords. The list of press
Example:
```python3
>>> from daidepp.grammar.grammar_utils import create_grammar_from_press_keywords
>>> grammar = create_grammar_from_press_keywords(["PRP", "XDO", "ALY_VSS"]
>>> grammar = create_grammar_from_press_keywords(["PRP", "XDO", "ALY_VSS"])
>>> grammar.parse("PRP (ALY (ITA TUR) VSS (ENG RUS))")
>>> grammar.parse("PRP(XDO((ENG FLT EDI) SUP (ENG AMY LVP) MTO CLY))")
>>> grammar.parse("PRP(PCE (AUS ENG))") # this would fail
Expand Down
2 changes: 2 additions & 0 deletions src/daidepp/grammar/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from __future__ import annotations

from typing import Dict, Tuple
from typing_extensions import get_args

from typing_extensions import Literal

DAIDELevel = Literal[
0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160
]
MAX_DAIDE_LEVEL = get_args(DAIDELevel)[-1]

TRAIL_TOKEN = "---" # any value starting with '---' is meant to be a continuation of that key, not a replacement

Expand Down
11 changes: 7 additions & 4 deletions src/daidepp/grammar/grammar_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
LEVELS,
TRAIL_TOKEN,
DAIDELevel,
MAX_DAIDE_LEVEL,
GrammarDict,
)

Expand Down Expand Up @@ -225,8 +226,7 @@ def create_grammar_from_press_keywords(
Parameters
----------
keywords : List[PressKeywords]
List of press keywords. Although the type hint says List[PressKeywords],
this can be a list of string literals or DAIDEObjects (to avoid circular imports).
List of press keywords.
allow_just_arrangement : bool, optional
if set to True, the parser accepts strings that are only arrangements, in
addition to press messages. So, for example, the parser could parse, by default False
Expand All @@ -246,8 +246,11 @@ def create_grammar_from_press_keywords(
if allow_just_arrangement and string_type == "message":
string_type = "arrangement"

# The input order or any duplicate elements should not affect the generated grammar
keywords = sorted(set(keywords))

full_grammar = create_daide_grammar(
level=DAIDELevel.__args__[-1],
level=MAX_DAIDE_LEVEL,
string_type=string_type,
)
current_set = set(LEVEL_0.keys())
Expand All @@ -271,7 +274,7 @@ def create_grammar_from_press_keywords(
member.literal.lower() for member in full_grammar["try_tokens"].members
]
keyword_dependencies_special_keywords = defaultdict(list)
for keyword in current_set:
for keyword in sorted(current_set):
# 'message' is only added if press_message or reply is added
for special_keyword in [
"arrangement",
Expand Down
6 changes: 2 additions & 4 deletions src/daidepp/keywords/daide_object.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass

from typing_extensions import get_args

from daidepp.grammar import create_daide_grammar
from daidepp.grammar.grammar import DAIDELevel
from daidepp.grammar.grammar import MAX_DAIDE_LEVEL

_grammar = create_daide_grammar(get_args(DAIDELevel)[-1], string_type="all")
_grammar = create_daide_grammar(MAX_DAIDE_LEVEL, string_type="all")


@dataclass(eq=True, frozen=True)
Expand Down
3 changes: 1 addition & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
from typing_extensions import get_args

from daidepp.grammar import create_daide_grammar
from daidepp.grammar.grammar import DAIDELevel
from daidepp.grammar.grammar import DAIDELevel, MAX_DAIDE_LEVEL
from daidepp.keywords.press_keywords import AnyDAIDEToken
from daidepp.visitor import daide_visitor

# Declared outside of fixture for performance
MAX_DAIDE_LEVEL = get_args(DAIDELevel)[-1]
ALL_GRAMMAR = create_daide_grammar(level=MAX_DAIDE_LEVEL, string_type="all")


Expand Down

0 comments on commit e23b612

Please sign in to comment.