Skip to content

Commit

Permalink
Added Tree Regression Demos
Browse files Browse the repository at this point in the history
  • Loading branch information
kirthevasank committed Mar 5, 2019
1 parent d985e8d commit eac17fa
Show file tree
Hide file tree
Showing 13 changed files with 513 additions and 15 deletions.
7 changes: 0 additions & 7 deletions examples/salsa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,5 @@
Demo on the Shrunk Additive Least Squares Approximations method (SALSA) for high
dimensional regression.
-- [email protected]
If you use this experiment, please cite the following paper.
- Kandasamy K, Yu Y, "Additive Approximations in High Dimensional Nonparametric
Regression via the SALSA", International Conference on Machine Learning, 2016.
- (Dataset): Candanedo L M, Feldheim V, and Deramaix D, "Data Driven
Prediction Models of Energy Use of Appliances in a Low-energy House", Energy and
Buildings, 2017
"""

2 changes: 1 addition & 1 deletion examples/salsa/salsa_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@

def objective(x):
""" Objective. """
return salsa_compute_negative_validation_error(x, MAX_TR_DATA_SIZE)
return salsa_compute_negative_validation_error([MAX_TR_DATA_SIZE], x)

15 changes: 10 additions & 5 deletions examples/tree_reg/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@ gradient boosted regression and random forest classification.


To run this demo, you will need to download the `news_popularity.p` and
`naval_propulsion.p` datasets
into this directory. The datasets are available
`naval_propulsion.p` datasets into this directory. The datasets are available
[here](http://www.cs.cmu.edu/~kkandasa/dragonfly_datasets.html).
Then, run the following commands from this directory.
To run this demo, you will need to install
[scikit-learn](https://scikit-learn.org/stable/).


Look at [`in_code_demo.py`](in_code_demo.py) for a demo on how to use this in your code.
Alternatively, run the following commands from this directory for gradient boosted regression
on the naval propulsion dataset.
```bash
$ dragonfly-script.py --config config_salsa_energy.json --options ../options_files/options_example_realtime.txt
$ dragonfly-script.py --config config_salsa_energy_mf.json --options ../options_files/options_example_realtime.txt # For multi-fidelity version
$ dragonfly-script.py --config config_naval_gbr.json --options ../options_files/options_example_realtime.txt
$ dragonfly-script.py --config config_naval_gbr_mf.json --options ../options_files/options_example_realtime.txt # For multi-fidelity version
```

 
Expand Down
4 changes: 2 additions & 2 deletions examples/tree_reg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
A demo for fitting hyper-parameters in Tree based ensemble regression methods such as
gradient boosted regression and random forest classification.
A demo for fitting hyper-parameters in Tree based ensemble methods such as gradient
boosted regression/classification or random forest regression/classification.
-- [email protected]
"""

63 changes: 63 additions & 0 deletions examples/tree_reg/config_naval_gbr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"name": "naval_gbr",

"domain" : {

"loss" : {
"name":"loss",
"type":"discrete",
"items":"ls-lad-huber"
},

"log10_learning_rate" : {
"name":"log10_learning_rate",
"type":"float",
"min":-3,
"max":1
},

"n_estimators" : {
"name":"n_estimators",
"type":"int",
"min":1,
"max":1000
},

"subsample" : {
"name":"subsample",
"type":"float",
"min":0.1,
"max":1.0
},

"criterion" : {
"name":"criterion",
"type":"discrete",
"items":"friedman_mse-mse-mae"
},

"min_samples_split_frac" : {
"name":"min_samples_split_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"min_samples_leaf_frac" : {
"name":"min_samples_leaf_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"max_depth" : {
"name":"max_depth",
"type":"int",
"min":3,
"max":20
}

}

}

74 changes: 74 additions & 0 deletions examples/tree_reg/config_naval_gbr_mf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"name": "naval_gbr_mf",

"domain" : {

"loss" : {
"name":"loss",
"type":"discrete",
"items":"ls-lad-huber"
},

"log10_learning_rate" : {
"name":"log10_learning_rate",
"type":"float",
"min":-3,
"max":1
},

"n_estimators" : {
"name":"n_estimators",
"type":"int",
"min":1,
"max":1000
},

"subsample" : {
"name":"subsample",
"type":"float",
"min":0.1,
"max":1.0
},

"criterion" : {
"name":"criterion",
"type":"discrete",
"items":"friedman_mse-mse-mae"
},

"min_samples_split_frac" : {
"name":"min_samples_split_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"min_samples_leaf_frac" : {
"name":"min_samples_leaf_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"max_depth" : {
"name":"max_depth",
"type":"int",
"min":3,
"max":20
}

},

"fidel_space": {
"log_num_tr_data_to_use": {
"name":"log_num_tr_data_to_use",
"type":"float",
"min":8.0063675676502459,
"max":9.1049798563183568
}
},

"fidel_to_opt":[9.1049798563183568]

}

50 changes: 50 additions & 0 deletions examples/tree_reg/config_news_rfr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"name": "news_rfr",

"domain" : {

"n_estimators" : {
"name":"n_estimators",
"type":"int",
"min":1,
"max":1000
},

"criterion" : {
"name":"criterion",
"type":"discrete",
"items":"mse-mae"
},

"max_depth" : {
"name":"max_depth",
"type":"int",
"min":3,
"max":20
},

"min_samples_split_frac" : {
"name":"min_samples_split_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"min_samples_leaf_frac" : {
"name":"min_samples_leaf_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"max_features_frac" : {
"name":"max_features_frac",
"type":"float",
"min":0.001,
"max":1.0
}

}

}

61 changes: 61 additions & 0 deletions examples/tree_reg/config_news_rfr_mf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"name": "news_rfr_mf",

"domain" : {

"n_estimators" : {
"name":"n_estimators",
"type":"int",
"min":1,
"max":1000
},

"criterion" : {
"name":"criterion",
"type":"discrete",
"items":"mse-mae"
},

"max_depth" : {
"name":"max_depth",
"type":"int",
"min":3,
"max":20
},

"min_samples_split_frac" : {
"name":"min_samples_split_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"min_samples_leaf_frac" : {
"name":"min_samples_leaf_frac",
"type":"float",
"min":0.001,
"max":0.5
},

"max_features_frac" : {
"name":"max_features_frac",
"type":"float",
"min":0.001,
"max":1.0
}

},

"fidel_space": {
"log_num_tr_data_to_use": {
"name":"log_num_tr_data_to_use",
"type":"float",
"min":8.5171931914162382,
"max":9.9034875525361272
}
},

"fidel_to_opt":[9.9034875525361272]

}

15 changes: 15 additions & 0 deletions examples/tree_reg/naval_gbr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Tuning the hyperparameters of Gradient boosted classification on the Protein structure
prediction data.
-- [email protected]
"""

# pylint: disable=invalid-name

from naval_gbr_mf import MAX_TR_DATA_SIZE
from naval_gbr_mf import objective as objective_mf

def objective(x):
""" Objective. """
return objective_mf([MAX_TR_DATA_SIZE], x)

42 changes: 42 additions & 0 deletions examples/tree_reg/naval_gbr_mf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
Tuning the hyperparameters of Gradient boosted classification on the Protein structure
prediction data.
-- [email protected]
"""

# pylint: disable=invalid-name
# pylint: disable=unexpected-keyword-arg

import pickle
# Local
from skltree import gbr_train_and_validate, get_tr_dataset_size_from_z0

try:
import os
import sys
file_name = 'naval_propulsion.p'
curr_dir_path = os.path.dirname(os.path.realpath(__file__))
data_path = os.path.join(curr_dir_path, file_name)
if sys.version_info[0] < 3:
DATA = pickle.load(open(data_path, 'rb'))
else:
DATA = pickle.load(open(data_path, 'rb'), encoding='latin1')
except IOError:
print(('Could not load file %s. Make sure the file %s is in the same directory as ' +
'this file or pass the dataset to the function.')%(file_name, data_path))

MAX_TR_DATA_SIZE = 9000
MAX_VA_DATA_SIZE = 2000


def objective(z, x):
""" Objective. """
num_tr_data_to_use = get_tr_dataset_size_from_z0(z[0])
return gbr_train_and_validate(x, DATA, num_tr_data_to_use,
MAX_TR_DATA_SIZE, MAX_VA_DATA_SIZE)

def cost(z):
""" Compute cost. """
num_tr_data_to_use = get_tr_dataset_size_from_z0(z[0])
return num_tr_data_to_use / float(MAX_TR_DATA_SIZE)

14 changes: 14 additions & 0 deletions examples/tree_reg/news_rfr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Tuning the hyperparameters of Random forest regression on the News Popularity dataset
-- [email protected]
"""

# pylint: disable=invalid-name

from news_rfr_mf import MAX_TR_DATA_SIZE
from news_rfr_mf import objective as objective_mf

def objective(x):
""" Objective. """
return objective_mf([MAX_TR_DATA_SIZE], x)

Loading

0 comments on commit eac17fa

Please sign in to comment.