Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make keyword-based grammar creation deterministic #96

Merged
merged 4 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@ Example:
```python3
>>> from daidepp import create_daide_grammar, daide_visitor
>>> grammar = create_daide_grammar(level=130)
>>> message = 'PRP (AND (AND (SLO (ENG)) (SLO (GER)) (SLO (RUS))) (AND (SLO (ENG)) (SLO (GER)) (SLO (RUS))))'
>>> message = 'PRP (AND (SLO (ENG)) (SLO (GER)) (SLO (RUS)))'
>>> parse_tree = grammar.parse(message)
>>> output = daide_visitor.visit(parse_tree) # object composed of dataclass objects in keywords.py
>>> print(output)
`PRP ( AND ( AND ( SLO ( ENG ) ) ( SLO ( GER ) ) ( SLO ( RUS ) ) ) ( AND ( SLO ( ENG ) ) ( SLO ( GER ) ) ( SLO ( RUS ) ) ) )`
>>> str(output)
'PRP ( AND ( SLO ( ENG ) ) ( SLO ( GER ) ) ( SLO ( RUS ) ) )'
```

The daide_visitor outputs a dataclass that can be used to access useful information about the message.
```python3
>>> grammar = create_daide_grammar(level=130, allow_just_arrangement=True) # allows for messages without PRP
>>> from daidepp import create_daide_grammar, daide_visitor
>>> grammar = create_daide_grammar(level=130, string_type="arrangement") # allows for messages without PRP
>>> parse_tree = grammar.parse('ALY (GER FRA) VSS (TUR ITA)')
>>> output = daide_visitor.visit(parse_tree)
>>> print(output)
`ALY ( GER FRA ) VSS ( TUR ITA )`
>>> print(output.aly_power)
['GER', 'FRA']
>>> print(output.vss_power)
['TUR','ITA']
>>> str(output)
'ALY ( FRA GER ) VSS ( ITA TUR )'
>>> output.aly_powers
('FRA', 'GER')
>>> output.vss_powers
('ITA', 'TUR')
```

If the DAIDE token is not in the grammar or if the message is malformed, the parser will just throw an exception. We're currently working on returning a list of unrecognized tokens instead of just erroring out.
Expand All @@ -51,9 +52,9 @@ In addition, DAIDE strings can be constructed using the classes in [`base_keywor
Example:

```python3
>>> from daidepp import AND, PRP, PCE
>>> from daidepp import AND, PCE
>>> str(AND(PCE("AUS", "ENG"), PCE("AUS", "ENG"), PCE("AUS", "ENG", "FRA")))
`AND ( PCE ( AUS ENG ) ) ( PCE ( AUS ENG ) ) ( PCE ( AUS ENG FRA ) )`
'AND ( PCE ( AUS ENG ) ) ( PCE ( AUS ENG FRA ) )'
```
Each keyword class uses different parameters for instantiation, so it is recommended to carefully follow the type hints or checkout [`tests/keywords`](./tests/keywords/), which provides examples for each class.

Expand All @@ -63,7 +64,7 @@ Grammar can also be created using a subset of press keywords. The list of press
Example:
```python3
>>> from daidepp.grammar.grammar_utils import create_grammar_from_press_keywords
>>> grammar = create_grammar_from_press_keywords(["PRP", "XDO", "ALY_VSS"]
>>> grammar = create_grammar_from_press_keywords(["PRP", "XDO", "ALY_VSS"])
>>> grammar.parse("PRP (ALY (ITA TUR) VSS (ENG RUS))")
>>> grammar.parse("PRP(XDO((ENG FLT EDI) SUP (ENG AMY LVP) MTO CLY))")
>>> grammar.parse("PRP(PCE (AUS ENG))") # this would fail
Expand Down
2 changes: 2 additions & 0 deletions src/daidepp/grammar/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from __future__ import annotations

from typing import Dict, Tuple
from typing_extensions import get_args

from typing_extensions import Literal

DAIDELevel = Literal[
0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160
]
MAX_DAIDE_LEVEL = get_args(DAIDELevel)[-1]

TRAIL_TOKEN = "---" # any value starting with '---' is meant to be a continuation of that key, not a replacement

Expand Down
11 changes: 7 additions & 4 deletions src/daidepp/grammar/grammar_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
LEVELS,
TRAIL_TOKEN,
DAIDELevel,
MAX_DAIDE_LEVEL,
GrammarDict,
)

Expand Down Expand Up @@ -225,8 +226,7 @@ def create_grammar_from_press_keywords(
Parameters
----------
keywords : List[PressKeywords]
List of press keywords. Although the type hint says List[PressKeywords],
this can be a list of string literals or DAIDEObjects (to avoid circular imports).
List of press keywords.
allow_just_arrangement : bool, optional
if set to True, the parser accepts strings that are only arrangements, in
addition to press messages. So, for example, the parser could parse, by default False
Expand All @@ -246,8 +246,11 @@ def create_grammar_from_press_keywords(
if allow_just_arrangement and string_type == "message":
string_type = "arrangement"

# The input order or any duplicate elements should not affect the generated grammar
keywords = sorted(set(keywords))

full_grammar = create_daide_grammar(
level=DAIDELevel.__args__[-1],
level=MAX_DAIDE_LEVEL,
string_type=string_type,
)
current_set = set(LEVEL_0.keys())
Expand All @@ -271,7 +274,7 @@ def create_grammar_from_press_keywords(
member.literal.lower() for member in full_grammar["try_tokens"].members
]
keyword_dependencies_special_keywords = defaultdict(list)
for keyword in current_set:
for keyword in sorted(current_set):
# 'message' is only added if press_message or reply is added
for special_keyword in [
"arrangement",
Expand Down
6 changes: 2 additions & 4 deletions src/daidepp/keywords/daide_object.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass

from typing_extensions import get_args

from daidepp.grammar import create_daide_grammar
from daidepp.grammar.grammar import DAIDELevel
from daidepp.grammar.grammar import MAX_DAIDE_LEVEL

_grammar = create_daide_grammar(get_args(DAIDELevel)[-1], string_type="all")
_grammar = create_daide_grammar(MAX_DAIDE_LEVEL, string_type="all")


@dataclass(eq=True, frozen=True)
Expand Down
3 changes: 1 addition & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
from typing_extensions import get_args

from daidepp.grammar import create_daide_grammar
from daidepp.grammar.grammar import DAIDELevel
from daidepp.grammar.grammar import DAIDELevel, MAX_DAIDE_LEVEL
from daidepp.keywords.press_keywords import AnyDAIDEToken
from daidepp.visitor import daide_visitor

# Declared outside of fixture for performance
MAX_DAIDE_LEVEL = get_args(DAIDELevel)[-1]
ALL_GRAMMAR = create_daide_grammar(level=MAX_DAIDE_LEVEL, string_type="all")


Expand Down