Skip to content

Commit

Permalink
add: 1. moore_voting 2. xgb_boost_trees
Browse files Browse the repository at this point in the history
fix: conditions parse fail
  • Loading branch information
blizhan committed Jan 6, 2023
1 parent 05c8be7 commit 9a19e41
Show file tree
Hide file tree
Showing 9 changed files with 497 additions and 132 deletions.
59 changes: 54 additions & 5 deletions example.ipynb

Large diffs are not rendered by default.

344 changes: 220 additions & 124 deletions pycdoexpr/__init__.py

Large diffs are not rendered by default.

137 changes: 137 additions & 0 deletions pycdoexpr/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from binarytree import Node


def construct_tree(kw: list, cond: list) -> Node:
"""construct condition binary tree with keyword list and condition list
Args:
kw (list): keyword list
cond (list): condition list
Returns:
Node: binary tree root node
"""
if len(kw) == 2 and len(cond) == 1 and kw[0] == "if" and kw[1] == "else":
left = Node(value=0)
right = Node(value=1)
root = Node(value=cond[0].value, left=left, right=right)
return root
else:
stack = []
for n, k in enumerate(kw):
if k == "if":
stack.append(n)
elif k == "else":
idx = stack.pop()
if len(stack) == 0:
if_index = idx
else_index = n
break

root = Node(value=cond[if_index].value)

if if_index + 1 == else_index:
left = Node(0)
right = construct_tree(kw[else_index + 1 :], cond[if_index + 1 :])
elif if_index + 1 < else_index:
cond_num = kw[if_index + 1 : else_index - 1].count("if")
left = construct_tree(kw[if_index + 1 : else_index], cond[1 : cond_num + 1])
if len(kw[else_index + 1 :]) <= 1:
right = Node(1)
else:
right = construct_tree(kw[else_index + 1 :], cond[cond_num + 1 :])

root.left = left
root.right = right
return root

def construct_tree_with_tree_nodes(nodes:dict)->Node:
"""construct condition binary tree with decision tree nodes dict
Args:
nodes (dict): decision tree node dictionary
Returns:
Node: binary tree node
"""

def _construct_xgb_tree_node(root_number:int)->Node:
n = nodes[root_number]
root = Node(n.value)
if len(n.child_number):
left, right = _construct_xgb_tree_node(n.child_number[0]), _construct_xgb_tree_node(n.child_number[1])
root.left, root.right = left, right
return root

root = _construct_xgb_tree_node(0)
return root

def get_max_min_leaf_depth(root: Node) -> tuple:
"""get max min leaf depth from root
Args:
root (Node): _description_
Returns:
tuple: max_leaf_depth, min_leaf_depth
"""

size = 0
leaf_count = 0
min_leaf_depth = 0
max_leaf_depth = -1
is_strict = True
current_nodes = [root]

while len(current_nodes) > 0:
max_leaf_depth += 1
next_nodes = []
for node in current_nodes:
size += 1
# Node is a leaf.
if node.left is None and node.right is None:
if min_leaf_depth == 0:
min_leaf_depth = max_leaf_depth
leaf_count += 1

if node.left is not None:

next_nodes.append(node.left)

if node.right is not None:

next_nodes.append(node.right)

# If we see a node with only one child, it is not strict
is_strict &= (node.left is None) == (node.right is None)
current_nodes = next_nodes
return max_leaf_depth, min_leaf_depth

def construct_expr(node: Node) -> str:
"""construct cdo condition expr from binary tree root node
Args:
node (Node): root node
Returns:
str: expr str
"""
patt = "(({condition}))? ({true_value}): ({false_value})"
if get_max_min_leaf_depth(node)[0] == 1:
res = patt.format(
condition=node.value,
true_value=node.left.value.split("=")[-1],
false_value=node.right.value.split("=")[-1],
)
return res
else:
if get_max_min_leaf_depth(node.left)[0] >= 1:
left = construct_expr(node.left)
else:
left = node.left.value.split("=")[-1]
if get_max_min_leaf_depth(node.right)[0] >= 1:
right = construct_expr(node.right)
else:
right = node.right.value.split("=")[-1]
res = patt.format(condition=node.value, true_value=left, false_value=right)
return res
22 changes: 19 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from pycdoexpr import cdoexpr
wind_level_bins = [ 0.3, 1.6, 3.4, 5.5, 8. , 10.8, 13.9, 17.2,\
20.8, 24.5, 28.5, 32.6, 36.9, 41.4, 46.1, 50.9, 56. , 61.3]
cexpr = cdoexpr()
cexpr.digitize(varname='WIND_LEVEL', right=False)
cexpr.digitize(varname='WIND_LEVEL', bins=wind_level_bins, right=False)
```

2. convert multi-level conditions string in python syntax to cdo expr
Expand Down Expand Up @@ -78,8 +78,24 @@ f"cdo expr,'WW={expr}' infile outfile"
```
![](static/conditions.jpg)

3. convert single xgboost / sklearn tree to expr
- [ ] TODO
3. moore voting
```python

# EX3: generate moore voting cdo expression
expr = cexpr.moore_voting(voters=['a' ,'b', 'c'], varname='MAJOR')
f"cdo -expr,'{expr}' infile outfile"
```
![](static/moore_voting.jpg)

4. convert multi xgboost tree to expr with ensemble method (averaging, boosting, moore_voting) *experimental*
```python

# EX4: convert a xgb decision trees model to cdo expression

expr = cexpr.xgb_decision_trees('./static/model.pkl',ensemble='averaging')
f"cdo -expr, '{expr}' infile outfile"
```
![](static/xgb_decision_trees.jpg)

## Benchmark

Expand Down
Binary file added static/model.pkl
Binary file not shown.
Binary file added static/moore_voting.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/xgb_decision_trees.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file added test/__init__.py
Empty file.
67 changes: 67 additions & 0 deletions test/test_pycdoexpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from pycdoexpr import cdoexpr

def test_digitize():
cexpr = cdoexpr()
wind_level_bins = [ 0.3, 1.6, 3.4, 5.5, 8. , 10.8, 13.9, 17.2,\
20.8, 24.5, 28.5, 32.6, 36.9, 41.4, 46.1, 50.9, 56. , 61.3]
wind_level = range(0,len(wind_level_bins)+1)
expr = cexpr.digitize('WIND', wind_level_bins, wind_level, right=False)

def test_conditions():
cexpr = cdoexpr()
s = '''
if PRE1H > 0.001:
if TEM2 >= 3:
if PRE1H < 0.1:
WW = 51
elif PRE1H < 2.5:
WW = 61
elif PRE1H < 8:
WW = 62
else:
WW = 63
elif TEM2 >=0:
if PRE1H < 2.5:
WW = 66
else:
WW = 67
else:
if PRE1H < 0.1:
WW = 71
elif PRE1H < 0.2:
WW = 73
else:
WW = 75
else:
if VIS > 10000:
if TCC > 80:
WW = 3
elif TCC > 40:
WW = 2
else:
WW = 0
elif VIS >= 1000:
if RHU2 > 80:
WW = 45
elif RHU2 > 50:
WW = 48
else:
WW = 31
else:
if WS10 < 1:
WW = 45
else:
if RHU2 >=50:
WW = 45
else:
WW = 34
'''
expr = cexpr.conditions(s, verbose=True)

def test_moore_voting():
cexpr = cdoexpr()
expr = cexpr.moore_voting(['a' ,'b', 'c'], 'MAJOR')

def test_xgb_decision_trees():
cexpr = cdoexpr()
expr = cexpr.xgb_decision_trees('./static/model.pkl',ensemble='averaging')

0 comments on commit 9a19e41

Please sign in to comment.