Skip to content

Commit

Permalink
read_from_clipboard
Browse files Browse the repository at this point in the history
  • Loading branch information
jolespin committed Nov 28, 2019
1 parent 803b5f4 commit 3fd3318
Show file tree
Hide file tree
Showing 13 changed files with 81 additions and 4 deletions.
Binary file modified .DS_Store
Binary file not shown.
Empty file modified bin/Icon
100755 → 100644
Empty file.
Empty file modified bin/run_soothsayer.py
100755 → 100644
Empty file.
Binary file modified install/.DS_Store
Binary file not shown.
Binary file modified soothsayer/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion soothsayer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import datetime
__version__= "2019.11"
#datetime.datetime.utcnow().strftime("%Y.%m")
__version_specific__ = "2019.11.05" #datetime.datetime.utcnow().strftime("%Y.%m.%d")
__version_specific__ = "2019.11.26" #datetime.datetime.utcnow().strftime("%Y.%m.%d")
__author__ = "Josh L. Espinoza"
__email__ = "[email protected], [email protected]"
__url__ = "https://github.com/jolespin/soothsayer"
Expand Down
67 changes: 65 additions & 2 deletions soothsayer/feature_extraction/feature_extraction.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from .algorithms.clairvoyant import *
from ..io import read_textfile
from ..io import read_textfile, read_dataframe
from ..utils import is_path_like, assert_acceptable_arguments
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import warnings
import pandas as pd

_algorithms = ["Clairvoyant"]
_utils = ["get_trace_from_algorithm_log"]
_utils = ["get_trace_from_algorithm_log", "get_best_model_from_algorithm"]
__all__ = _algorithms + _utils
__all__ = sorted(__all__)

Expand All @@ -24,3 +29,61 @@ def get_trace_from_algorithm_log(path:str, algorithm:str="Clairvoyance"):
accuracy = float(line.split("Accuracy=")[1].split("\t")[0])
trace.append(accuracy)
return {"baseline":baseline, "trace":trace}



# Get best model from feature selection
def get_best_model_from_algorithm(synopsis:pd.DataFrame, model_type="infer", prefer="logistic", less_features_is_better=True, copy_synopsis=True, into=pd.Series, algorithm="clairvoyance"):
multiple_hits_message = "Multiple instances with best accuracy, lowest sem, and number of features. Choosing first option."
assert algorithm == "clairvoyance", "Currently, `clairvoyance` is the only supported algorithm"
if is_path_like(synopsis):
df_synopsis = read_dataframe(synopsis, evaluate_columns=["hyperparameters"] )
for id_feature_field in filter(lambda x:x.endswith("_set"), df_synopsis.columns):
df_synopsis[id_feature_field] = df_synopsis[id_feature_field].map(eval)
try:
df_synopsis[id_feature_field] = df_synopsis[id_feature_field].map(lambda x:list(map(eval, x)))
except NameError:
pass
else:
df_synopsis = synopsis
id_feature_field = list(filter(lambda x:x.endswith("_set"), df_synopsis.columns))[0]

# Sort the synopsis
feature_type = "_".join(id_feature_field.split("_")[:-1])
df_synopsis = df_synopsis.sort_values(["accuracy", "sem", "num_{}_included".format(feature_type)], ascending=[False, True, less_features_is_better])

# Infer best model_type
if model_type == "infer":
max_accuracy = df_synopsis["accuracy"].max()
idx_with_max_accuracy = df_synopsis["accuracy"][lambda x: x == max_accuracy].index
if len(idx_with_max_accuracy) == 1:
model_type = df_synopsis.loc[idx_with_max_accuracy[0],"model_type"]
else:
model_types_with_best_accuracy = df_synopsis.loc[idx_with_max_accuracy,"model_type"].unique()
if len(model_types_with_best_accuracy) == 2:
model_type = prefer
else:
warnings.warn(multiple_hits_message)
model_type = model_types_with_best_accuracy[0]

# Subset model of interest
df_synopsis = df_synopsis.query("model_type == '{}'".format(model_type))
max_accuracy = df_synopsis["accuracy"].max()
idx_with_max_accuracy = df_synopsis["accuracy"][lambda x: x == max_accuracy].index
if len(idx_with_max_accuracy) > 1:
warnings.warn(multiple_hits_message)
# Best model
best_model_info = df_synopsis.loc[idx_with_max_accuracy[0]]
ModelClass = {
"logistic":LogisticRegression,
"tree":DecisionTreeClassifier}[model_type]
output_info = {
"clf":ModelClass(**best_model_info["hyperparameters"],
random_state=best_model_info["random_state"]),
"hyperparameters":best_model_info["hyperparameters"],
"features":best_model_info[id_feature_field],
**best_model_info[["accuracy", "sem", "delta"]],
}
if copy_synopsis:
output_info["synopsis"] = df_synopsis
return into(output_info)
Binary file modified soothsayer/io/.DS_Store
Binary file not shown.
Binary file modified soothsayer/r_wrappers/.DS_Store
Binary file not shown.
Binary file modified soothsayer/utils/.DS_Store
Binary file not shown.
16 changes: 15 additions & 1 deletion soothsayer/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"is_dict", "is_rgb_like", "is_nonstring_iterable","is_dict_like", "is_color", "is_graph", "is_all_same_type", "is_number", "is_query_class","is_symmetrical", "is_in_namespace",
"format_mpl_legend_handles", "LEGEND_KWS", "DIVERGING_KWS", "CMAP_DIVERGING","COLOR_NEGATIVE", "COLOR_POSITIVE", "get_coords_contour", "get_coords_centroid", "get_parameters_ellipse", "add_cbar_from_data", "configure_scatter",
"pd_series_collapse", "is_path_like", "pd_series_filter", "pd_dataframe_matmul", "pd_series_to_groupby_to_dataframe","pd_dataframe_query","pd_dropduplicates_index", "contains","consecutive_replace", "force_symmetry","range_like","generate_random_sequence","fragment","pd_dataframe_extend_index","is_file_like","get_iris_data","assert_acceptable_arguments","filter_compositional","is_function","Command","get_directory_size","DisplayablePath","join_as_strings",
"get_repr",
"get_repr","read_from_clipboard",
]
__all__ = sorted(__all__)

Expand All @@ -39,6 +39,20 @@
DIVERGING_KWS = dict(h_neg=220, h_pos=15, sep=20, s=90, l=50)
CMAP_DIVERGING = sns.diverging_palette(**DIVERGING_KWS, as_cmap=True)
COLOR_NEGATIVE, COLOR_POSITIVE = sns.diverging_palette(**DIVERGING_KWS, n=2).as_hex()
# =========
# Clipboard
# =========
def read_from_clipboard(sep="\n", into=list):
data = pd.io.clipboard.clipboard_get()
if sep is not None:
return into(filter(bool,
map(lambda x:x.strip(),
data.split(sep)
)
)
)
else:
return data

# ===========
# Assertions
Expand Down
Empty file modified standalone/Icon
100755 → 100644
Empty file.
Binary file modified tutorials/.DS_Store
Binary file not shown.

0 comments on commit 3fd3318

Please sign in to comment.