Skip to content

Commit

Permalink
Fix some bugs: #134, #136, #162 (#181)
Browse files Browse the repository at this point in the history
* fix: #162 enforce utf8 config encoding

* chore: resolve #134

* fix: #136 dict string handeling

* chore: resolve and raise `FutureWarning`s
  • Loading branch information
lkstrp authored Jul 23, 2024
1 parent 156b9b9 commit 8885715
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 26 deletions.
2 changes: 1 addition & 1 deletion powerplantmatching/cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def aggregate_units(
for arg in used_deprecated_args:
kwargs.pop(arg)
msg = "The following arguments were deprecated and are being ignored: "
logger.warn(msg + f"{used_deprecated_args}")
logger.warning(msg + f"{used_deprecated_args}")

df = get_obj_if_Acc(df)

Expand Down
12 changes: 6 additions & 6 deletions powerplantmatching/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from .matching import combine_multiple_datasets, reduce_matched_dataframe
from .utils import (
parmap,
projectID_to_dict,
parse_string_to_dict,
set_column_name,
to_dict_if_string,
)
Expand Down Expand Up @@ -113,7 +113,7 @@ def df_by_name(name):
df = pd.read_csv(
outfn_matched, index_col=0, header=[0, 1], low_memory=False
)
return df.pipe(projectID_to_dict)
return df.pipe(parse_string_to_dict, ["projectID", "EIC"])


def powerplants(
Expand Down Expand Up @@ -181,9 +181,9 @@ def powerplants(
used_deprecated_args = deprecated_args.intersection(collection_kwargs.keys())
if used_deprecated_args:
msg = "The following arguments were deprecated and are being ignored: "
logger.warn(msg + f"{used_deprecated_args}")
logger.warning(msg + f"{used_deprecated_args}")
if extendby_kwargs:
logger.warn(
logger.warning(
DeprecationWarning,
"`extendby_kwargs` is deprecated in the favor of extend_by_kwargs",
)
Expand All @@ -204,7 +204,7 @@ def powerplants(
logger.info(f"Retrieving data from {url}")
df = (
pd.read_csv(url, index_col=0)
.pipe(projectID_to_dict)
.pipe(parse_string_to_dict, ["projectID", "EIC"])
.pipe(set_column_name, "Matched Data")
)
logger.info(f"Store data at {fn}")
Expand All @@ -214,7 +214,7 @@ def powerplants(
if not update and os.path.exists(fn):
df = (
pd.read_csv(fn, index_col=0, header=header)
.pipe(projectID_to_dict)
.pipe(parse_string_to_dict, ["projectID", "EIC"])
.pipe(set_column_name, "Matched Data")
)
if extend_by_vres:
Expand Down
7 changes: 3 additions & 4 deletions powerplantmatching/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ def _data_out(fn, config):

# Logging: General Settings
logger = logging.getLogger(__name__)
logging.basicConfig(level=20)
logger.setLevel("INFO")
logger.setLevel(logging.INFO)
# Logging: File
logFormatter = logging.Formatter(
"%(asctime)s [%(threadName)-12.12s] " "[%(levelname)-5.5s] %(message)s"
Expand Down Expand Up @@ -100,10 +99,10 @@ def get_config(filename=None, **overrides):
else:
custom_config = package_config["custom_config"]

with open(base_config) as f:
with open(base_config, encoding="utf8") as f:
config = yaml.load(f, Loader=yaml.FullLoader)
if exists(custom_config):
with open(custom_config) as f:
with open(custom_config, encoding="utf8") as f:
config.update(yaml.load(f, Loader=yaml.FullLoader))
config.update(overrides)

Expand Down
4 changes: 2 additions & 2 deletions powerplantmatching/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def fill_missing_commissioning_years(df):
df["DateIn"] = df.DateIn.fillna(df.groupby(["Country"]).DateIn.transform("mean"))
if df.DateIn.isnull().any():
count = len(df[df.DateIn.isnull()])
logger.warn(
logger.warning(
f"""There are still *{count}* empty values for
'DateIn' in the DataFrame. These should
be either be filled manually or dropped.
Expand Down Expand Up @@ -621,6 +621,6 @@ def set_known_retire_years(df):
if name_match_b.any():
ppl_de_nuc.loc[name_match_b, "YearRetire"] = year
else:
logger.warn(f"'{name}' was not found in given DataFrame.")
logger.warning(f"'{name}' was not found in given DataFrame.")
df.loc[ppl_de_nuc.index, "YearRetire"] = ppl_de_nuc["YearRetire"]
return df
6 changes: 3 additions & 3 deletions powerplantmatching/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def compare_two_datasets(dfs, labels, country_wise=True, config=None, **dukeargs
for arg in used_deprecated_args:
dukeargs.pop(arg)
msg = "The following arguments were deprecated and are being ignored: "
logger.warn(msg + f"{used_deprecated_args}")
logger.warning(msg + f"{used_deprecated_args}")

dfs = list(map(read_csv_if_string, dfs))
if "singlematch" not in dukeargs:
Expand Down Expand Up @@ -150,12 +150,12 @@ def cross_matches(sets_of_pairs, labels=None):
matches = pd.concat([matches, match_base], sort=True)

if matches is None or matches.empty:
logger.warn("No matches found")
logger.warning("No matches found")
return pd.DataFrame(columns=labels)

if matches.isnull().all().any():
cols = ", ".join(matches.columns[matches.isnull().all()])
logger.warn(f"No matches found for data source {cols}")
logger.warning(f"No matches found for data source {cols}")

matches = matches.drop_duplicates().reset_index(drop=True)
for label in labels:
Expand Down
4 changes: 2 additions & 2 deletions powerplantmatching/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
cartopy_present = False

if not cartopy_present:
logger.warn("Cartopy not existent.")
logger.warning("Cartopy not existent.")


def fueltype_stats(df):
Expand Down Expand Up @@ -524,7 +524,7 @@ def calc(n, m):
# .fillna(0.0)) # country (if all zero->drop!).
#
# if (show_indicators or threshold >= 0.) and len(stats.columns) < 2:
# logger.warn('At least two objects for comparison needed when using '
# logger.warning('At least two objects for comparison needed when using '
# '`show_indicators` or `threshold`. Arguments ignored.')
# show_indicators = False
# threshold = -1
Expand Down
38 changes: 30 additions & 8 deletions powerplantmatching/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import multiprocessing
import os
import re
from ast import literal_eval as liteval

import country_converter as coco
Expand Down Expand Up @@ -270,18 +271,39 @@ def to_dict_if_string(s):
return s


def projectID_to_dict(df):
def parse_string_to_dict(df, cols):
"""
Convenience function to convert string of dict to dict type
Convenience function to convert string of dict to dict type for specified columns.
Parameters
----------
df : pd.DataFrame
DataFrame on which to apply the parsing
cols : str, list
Column(s) to be parsed to dict type
Returns
-------
pd.DataFrame
DataFrame with specified columns parsed to dict type
"""
if df.columns.nlevels > 1:
if isinstance(cols, str):
cols = [cols]

def _replace_and_evaluate(value):
# Needed to read in older files with {nan} as string
value = re.sub(r"\bnan\b(, )?|, \bnan\b", "", value)
return liteval(value)

if isinstance(df.columns, pd.MultiIndex):
return df.assign(
projectID=(
df.projectID.stack().dropna().apply(lambda ds: liteval(ds)).unstack()
)
**{
col: df[col].stack().dropna().apply(_replace_and_evaluate).unstack()
for col in cols
}
)
else:
return df.assign(projectID=df.projectID.apply(lambda x: liteval(x)))
return df.assign(**{col: df[col].apply(_replace_and_evaluate) for col in cols})


def select_by_projectID(df, projectID, dataset_name=None):
Expand Down Expand Up @@ -563,7 +585,7 @@ def parse_Geoposition(
exactly_one=True,
)
except geopy.exc.GeocoderQueryError as e:
logger.warn(e)
logger.warning(e)
gdata = None

if gdata is not None:
Expand Down
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,11 @@ select = [
'UP', # pyupgrade
'TID', # flake8-tidy-imports
'NPY', # numpy
]

# Pytest settings

[tool.pytest.ini_options]
filterwarnings = [
"error::FutureWarning", # Raise all FutureWarnings as errors
]

0 comments on commit 8885715

Please sign in to comment.