Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code refactoring #338

Merged
merged 2 commits into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions intake_esm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,7 @@ def _set_groups_and_keys(self):
internal_keys = self._grouped.groups.keys()
public_keys = []
for key in internal_keys:
if isinstance(key, str):
p_key = key
else:
p_key = self.sep.join(str(v) for v in key)
p_key = key if isinstance(key, str) else self.sep.join(str(v) for v in key)
public_keys.append(p_key)

else:
Expand Down Expand Up @@ -206,14 +203,13 @@ def _get_aggregation_info(self):
for column in columns:
self.df[column] = self.df[column].map(tuple)

aggregation_info = AggregationInfo(
return AggregationInfo(
groupby_attrs,
variable_column_name,
aggregations,
agg_columns,
aggregation_dict,
)
return aggregation_info

def keys(self) -> List:
"""
Expand All @@ -237,10 +233,9 @@ def key_template(self) -> str:
string template used to create catalog entry keys
"""
if self.aggregation_info.groupby_attrs:
template = self.sep.join(self.aggregation_info.groupby_attrs)
return self.sep.join(self.aggregation_info.groupby_attrs)
else:
template = self.sep.join(self.df.columns)
return template
return self.sep.join(self.df.columns)

@property
def df(self) -> pd.DataFrame:
Expand Down Expand Up @@ -525,8 +520,7 @@ def _repr_html_(self):
"""
uniques = pd.DataFrame(self.nunique(), columns=['unique'])
text = uniques._repr_html_()
output = f'<p><strong>{self.esmcol_data["id"]} catalog with {len(self)} dataset(s) from {len(self.df)} asset(s)</strong>:</p> {text}'
return output
return f'<p><strong>{self.esmcol_data["id"]} catalog with {len(self)} dataset(s) from {len(self.df)} asset(s)</strong>:</p> {text}'

def _ipython_display_(self):
"""
Expand Down Expand Up @@ -760,9 +754,7 @@ def nunique(self) -> pd.Series:
"""

uniques = self.unique(self.df.columns.tolist())
nuniques = {}
for key, val in uniques.items():
nuniques[key] = val['count']
nuniques = {key: val['count'] for key, val in uniques.items()}
return pd.Series(nuniques)

def unique(self, columns: Union[str, List] = None) -> Dict[str, Any]:
Expand Down
5 changes: 1 addition & 4 deletions intake_esm/merge_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,7 @@ def union(
def _to_nested_dict(df):
"""Converts a multiindex series to nested dict"""
if hasattr(df.index, 'levels') and len(df.index.levels) > 1:
ret = {}
for k, v in df.groupby(level=0):
ret[k] = _to_nested_dict(v.droplevel(0))
return ret
return {k: _to_nested_dict(v.droplevel(0)) for k, v in df.groupby(level=0)}
return df.to_dict()


Expand Down
22 changes: 9 additions & 13 deletions intake_esm/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ def _unique(df, columns=None):

def _find_unique(series):
values = series.dropna().values
uniques = list(set(_flatten_list(values)))
return uniques
return list(set(_flatten_list(values)))

x = df[columns].apply(_find_unique, result_type='reduce').to_dict()
info = {}
Expand Down Expand Up @@ -98,7 +97,7 @@ def search(df, require_all_on=None, **query):
if index == condition:
results.append(group)

if len(results) >= 1:
if results:
return pd.concat(results).reset_index(drop=True)

warn(message)
Expand Down Expand Up @@ -133,18 +132,15 @@ def _is_pattern(value):
def _flatten_list(data):
for item in data:
if isinstance(item, Iterable) and not isinstance(item, str):
for x in _flatten_list(item):
yield x
yield from _flatten_list(item)
else:
yield item


def _get_columns_with_iterables(df):
if not df.empty:
has_iterables = (
df.sample(20, replace=True).applymap(type).isin([list, tuple, set]).any().to_dict()
)
columns_with_iterables = [column for column, check in has_iterables.items() if check]
else:
columns_with_iterables = []
return columns_with_iterables
if df.empty:
return []
has_iterables = (
df.sample(20, replace=True).applymap(type).isin([list, tuple, set]).any().to_dict()
)
return [column for column, check in has_iterables.items() if check]
6 changes: 2 additions & 4 deletions intake_esm/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,7 @@ def __init__(

def __repr__(self):
"""Make string representation of object."""
contents = f'<name: {self.name}, assets: {len(self.df)}'
return contents
return f'<name: {self.name}, assets: {len(self.df)}'

def _ipython_display_(self):
"""
Expand Down Expand Up @@ -254,8 +253,7 @@ def close(self):

def create_nested_dict(df, path_column, aggregation_columns):
mi = df.set_index(aggregation_columns)
nd = _to_nested_dict(mi[path_column])
return nd
return _to_nested_dict(mi[path_column])


def _sanitize_aggregations(df, aggregation_dict):
Expand Down