Skip to content

Commit

Permalink
Update for October
Browse files Browse the repository at this point in the history
  • Loading branch information
jolespin committed Nov 5, 2019
1 parent deb61d1 commit 0182e83
Show file tree
Hide file tree
Showing 12 changed files with 137 additions and 27 deletions.
Binary file modified .DS_Store
Binary file not shown.
Empty file modified bin/run_soothsayer.py
100644 → 100755
Empty file.
Binary file modified install/.DS_Store
Binary file not shown.
Binary file modified soothsayer/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion soothsayer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import datetime
__version__= "2019.10"
#datetime.datetime.utcnow().strftime("%Y.%m")
__version_specific__ = "2019.10.17" #datetime.datetime.utcnow().strftime("%Y.%m.%d")
__version_specific__ = "2019.10.31" #datetime.datetime.utcnow().strftime("%Y.%m.%d")
__author__ = "Josh L. Espinoza"
__email__ = "[email protected], [email protected]"
__url__ = "https://github.com/jolespin/soothsayer"
Expand Down
79 changes: 55 additions & 24 deletions soothsayer/core/core.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import os, sys, datetime, copy
from collections import OrderedDict
import pandas as pd
from ..utils import is_path_like, is_nonstring_iterable, pd_dataframe_extend_index
from skbio.util._decorator import experimental, stable

from ..utils import is_path_like, is_nonstring_iterable, pd_dataframe_extend_index, assert_acceptable_arguments
from ..io import read_dataframe, write_object

__all__ = ["Dataset"]
__all__ = sorted(__all__)



# Dataset
@experimental(as_of="2019.06")
class Dataset(object):
def __init__(self, data:pd.DataFrame, metadata_observations:pd.DataFrame=None, metadata_attributes:pd.DataFrame=None, metadata_target_field=None, name=None, description=None, obsv_type=None, attr_type=None, metric_type=None, name_initial_data=None, check_index_overlap=True, alias_metadata_observations:str="m0", alias_metadata_attributes:str="m1", **additional_fields):
"""
Expand Down Expand Up @@ -75,16 +80,14 @@ def __init__(self, data:pd.DataFrame, metadata_observations:pd.DataFrame=None, m
if metadata_observations is None:
metadata_observations = pd_dataframe_extend_index(data.index, pd.DataFrame(), axis=0)
self.add_metadata(metadata_observations, axis="observations", metadata_target_field=metadata_target_field)
if self.alias_metadata_observations is not None:
setattr(self, str(self.alias_metadata_observations), self.metadata_observations)



# Metadata attributes
if metadata_attributes is None:
metadata_attributes = pd_dataframe_extend_index(data.columns, pd.DataFrame(), axis=0)
self.add_metadata(metadata_attributes, axis="attributes", metadata_target_field=None)
if self.alias_metadata_attributes is not None:
setattr(self, str(self.alias_metadata_attributes), self.metadata_attributes)


def __repr__(self):
class_name = str(self.__class__).split(".")[-1][:-2]
Expand Down Expand Up @@ -153,6 +156,9 @@ def add_metadata(self, metadata:pd.DataFrame, axis="infer", metadata_target_fiel
self.y_field = metadata_target_field
self.y = self.metadata_observations[self.y_field]

if self.alias_metadata_observations is not None:
setattr(self, str(self.alias_metadata_observations), self.metadata_observations)

# Metadata attributes
if axis in {"attrs", "attributes","columns", 1}:
if self.check_index_overlap:
Expand All @@ -164,6 +170,8 @@ def add_metadata(self, metadata:pd.DataFrame, axis="infer", metadata_target_fiel
self.metadata_attributes = self.metadata_attributes.to_frame()
if self.check_index_overlap:
self.metadata_attributes = self.metadata_attributes.loc[initial_data_attributes]
if self.alias_metadata_attributes is not None:
setattr(self, str(self.alias_metadata_attributes), self.metadata_attributes)
return self

# Add data versions
Expand Down Expand Up @@ -317,25 +325,27 @@ def set_default(self, name_version, observation_subset=None, attribute_subset=No
self.columns_version = attribute_subset
return self

# Filter dataset
def filter(self, func_observations=None, func_attributes=None, name_version=None):
"""
Filter a datasets
"""
# If no version is specified then use the default
if name_version is None:
name_version = self.X_version
assert name_version in self.__database__, f"Cannot find `{name_version}`. Please add it to the datasets via `add_version`"
df = self.__database__[name_version]["data"]
# Observations
idx_observations = df.index
if func_observations is not None:
idx_observations = [*filter(func_observations, idx_observations)]
# Attributes
idx_attributes = df.columns
if func_attributes is not None:
idx_attributes = [*filter(func_attributes, idx_attributes)]
return df.loc[idx_observations, idx_attributes]
# # Filter dataset
# def filter(self, func_observations=None, func_attributes=None, name_version=None):
# """
# Filter a datasets

# #! Revisit this
# """
# # If no version is specified then use the default
# if name_version is None:
# name_version = self.X_version
# assert name_version in self.__database__, f"Cannot find `{name_version}`. Please add it to the datasets via `add_version`"
# df = self.__database__[name_version]["data"]
# # Observations
# idx_observations = df.index
# if func_observations is not None:
# idx_observations = [*filter(func_observations, idx_observations)]
# # Attributes
# idx_attributes = df.columns
# if func_attributes is not None:
# idx_attributes = [*filter(func_attributes, idx_attributes)]
# return df.loc[idx_observations, idx_attributes]

# Write object to file
def to_file(self, path:str, compression="infer"):
Expand Down Expand Up @@ -385,5 +395,26 @@ def __iter__(self):
for name_version, d in self.__database__.items():
yield name_version, d["data"]

def __call__(self, field, index=None, func_filter=None, func_map=None, axis=0):
assert_acceptable_arguments(axis, {0,1})
assert not is_nonstring_iterable(field), "`field` cannot be a non-string iterable"
if axis == 0:
assert self.metadata_observations is not None
assert field in self.metadata_observations.columns, "`{}` not in `metadata_observations`".format(field)
data = self.metadata_observations[field]
if axis == 1:
assert self.metadata_attributes is not None
assert field in self.metadata_attributes.columns, "`{}` not in `metadata_attributes`".format(field)
data = self.metadata_attributes[field]
if index is not None:
data = data[index]
if func_filter is not None:
data = data[func_filter]
if func_map is not None:
data = data.map(func_map)

return data


def copy(self):
return copy.deepcopy(self)
Binary file modified soothsayer/io/.DS_Store
Binary file not shown.
65 changes: 64 additions & 1 deletion soothsayer/networks/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@



__all__ = ["Hive", "intramodular_connectivity", "topological_overlap_measure", "signed", "determine_soft_threshold", "TemporalNetwork", "Edge"]
__all__ = ["Hive", "intramodular_connectivity", "topological_overlap_measure", "signed", "determine_soft_threshold","cluster_modularity", "TemporalNetwork", "Edge"]
__all__ = sorted(__all__)

# Network Edge
Expand Down Expand Up @@ -1030,6 +1030,69 @@ def determine_soft_threshold(similarity:pd.DataFrame, title=None, show_plot=True
fig.suptitle(title, fontsize=18, fontweight="bold", y=pad)
return fig, ax, df_sft

# Cluster modularity matrix
def cluster_modularity(df:pd.DataFrame, node_type="node", iteration_type="iteration"):
"""
n_louvain = 100
louvain = dict()
for rs in tqdm(range(n_louvain), "Louvain"):
louvain[rs] = community.best_partition(graph_unsigned, random_state=rs)
df = pd.DataFrame(louvain)
# df.head()
# 0 1 2 3 4 5 6 7 8 9
# a 0 0 0 0 0 0 0 0 0 0
# b 1 1 1 1 1 1 1 1 1 1
# c 2 2 2 2 2 2 2 2 2 2
# d 3 3 3 3 3 3 3 3 3 3
# e 4 1 1 4 1 4 4 1 4 1
cluster_modularity(df).head()
iteration 0 1 2 3 4 5 6 7 8 9
node
(b, a) 0 0 0 0 0 0 0 0 0 0
(c, a) 0 0 0 0 0 0 0 0 0 0
(d, a) 0 0 0 0 0 0 0 0 0 0
(e, a) 0 0 0 0 0 0 0 0 0 0
(a, f) 0 0 0 0 0 0 0 0 0 0
"""

# Adapted from @code-different:
# https://stackoverflow.com/questions/58566957/how-to-transform-a-dataframe-of-cluster-class-group-labels-into-a-pairwise-dataf


# `x` is a table of (n=nodes, p=iterations)
nodes = df.index
iterations = df.columns
x = df.values
n,p = x.shape

# `y` is an array of n tables, each having 1 row and p columns
y = x[:, None]

# Using numpy broadcasting, `z` contains the result of comparing each
# table in `y` against `x`. So the shape of `z` is (n x n x p)
z = x == y

# Reshaping `z` by merging the first two dimensions
data = z.reshape((z.shape[0] * z.shape[1], z.shape[2]))

# Redundant pairs
redundant_pairs = list(map(lambda node:frozenset([node]), nodes))

# Create pairwise clustering matrix
df_pairs = pd.DataFrame(
data=data,
index=pd.Index(list(map(frozenset, itertools.product(nodes,nodes))), name=node_type),
columns=pd.Index(iterations, name=iteration_type),
dtype=int,
).drop(redundant_pairs, axis=0)


return df_pairs[~df_pairs.index.duplicated(keep="first")]

# Temporal Networks
class TemporalNetwork(object):
"""
Expand Down
Binary file modified soothsayer/r_wrappers/.DS_Store
Binary file not shown.
Binary file modified soothsayer/utils/.DS_Store
Binary file not shown.
18 changes: 17 additions & 1 deletion soothsayer/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
"rgb_to_rgba", "map_colors", "infer_cmap", "infer_vmin_vmax", "infer_continuous_type", "scalarmapping_from_data", "Chromatic", "create_logfile", "determine_mode_for_logfiles",
"is_dict", "is_rgb_like", "is_nonstring_iterable","is_dict_like", "is_color", "is_graph", "is_all_same_type", "is_number", "is_query_class","is_symmetrical", "is_in_namespace",
"format_mpl_legend_handles", "LEGEND_KWS", "DIVERGING_KWS", "CMAP_DIVERGING","COLOR_NEGATIVE", "COLOR_POSITIVE", "get_coords_contour", "get_coords_centroid", "get_parameters_ellipse", "add_cbar_from_data", "configure_scatter",
"pd_series_collapse", "is_path_like", "pd_series_filter", "pd_dataframe_matmul", "pd_series_to_groupby_to_dataframe","pd_dataframe_query","contains","consecutive_replace", "force_symmetry","range_like","generate_random_sequence","fragment","pd_dataframe_extend_index","is_file_like","get_iris_data","assert_acceptable_arguments","filter_compositional","is_function","Command","get_directory_size","DisplayablePath","join_as_strings",
"pd_series_collapse", "is_path_like", "pd_series_filter", "pd_dataframe_matmul", "pd_series_to_groupby_to_dataframe","pd_dataframe_query","pd_dropduplicates_index", "contains","consecutive_replace", "force_symmetry","range_like","generate_random_sequence","fragment","pd_dataframe_extend_index","is_file_like","get_iris_data","assert_acceptable_arguments","filter_compositional","is_function","Command","get_directory_size","DisplayablePath","join_as_strings",
"get_repr",
]
__all__ = sorted(__all__)

Expand Down Expand Up @@ -323,6 +324,13 @@ def consecutive_replace(x:str, *patterns):
x = x.replace(a,b)
return x

# Get repr for custom classes
def get_repr(class_name, instance_name=None, *args):
header = "{}(name = {})".format(class_name, instance_name)
info = format_header(header)
for field in args:
info += "\n\t* {}".format(field)
return info
# ============
# Dictionaries
# ============
Expand Down Expand Up @@ -1098,6 +1106,14 @@ def pd_dataframe_extend_index(index_extended, df=None, fill=np.nan, axis=0):
A[:] = np.nan
return pd.concat([df, pd.DataFrame(A, index=df.index, columns=idx_extend)]).fillna(fill)

# Drop duplicates index
def pd_dropduplicates_index(data, keep="first", axis=0):
if axis in {0, None}:
return data[~data.index.duplicated(keep=keep)]
if axis == 1:
data = data.T
return data[~data.index.duplicated(keep=keep)].T

# =======
# Filters
# =======
Expand Down
Binary file modified tutorials/.DS_Store
Binary file not shown.

0 comments on commit 0182e83

Please sign in to comment.