-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: introduce auto parameter for correlations (#1095)
* feat: introduce discretization capabilities * feat: introduce 'auto' parameter to correlations * docs: make documentation after adding 'auto' option to the correlation metrics * feat: introduce n_bins as a parameter to 'auto' correlation * feat: introduce option for user to change n_bins argument for ‘auto’ correlation
- Loading branch information
1 parent
744a40d
commit 344b176
Showing
16 changed files
with
337 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,5 +19,7 @@ | |
|
||
~Theme.united | ||
~Theme.flatly | ||
~Theme.cosmo | ||
~Theme.simplex | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
|
||
.. autosummary:: | ||
|
||
Auto | ||
Correlation | ||
Cramers | ||
Kendall | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from enum import Enum | ||
from typing import List | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
class DiscretizationType(Enum): | ||
UNIFORM = "uniform" | ||
QUANTILE = "quantile" | ||
|
||
|
||
class Discretizer: | ||
""" | ||
A class which enables the discretization of a pandas dataframe. | ||
Perform this action when you want to convert a continuous variable | ||
into a categorical variable. | ||
Attributes: | ||
method (DiscretizationType): this attribute controls how the buckets | ||
of your discretization are formed. A uniform discretization type forms | ||
the bins to be of equal width whereas a quantile discretization type | ||
forms the bins to be of equal size. | ||
n_bins (int): number of bins | ||
reset_index (bool): instruction to reset the index of | ||
the dataframe after the discretization | ||
""" | ||
|
||
def __init__( | ||
self, method: DiscretizationType, n_bins: int = 10, reset_index: bool = False | ||
) -> None: | ||
self.discretization_type = method | ||
self.n_bins = n_bins | ||
self.reset_index = reset_index | ||
|
||
def discretize_dataframe(self, dataframe: pd.DataFrame) -> pd.DataFrame: | ||
"""_summary_ | ||
Args: | ||
dataframe (pd.DataFrame): pandas dataframe | ||
Returns: | ||
pd.DataFrame: discretized dataframe | ||
""" | ||
|
||
discretized_df = dataframe.copy() | ||
all_columns = dataframe.columns | ||
num_columns = self._get_numerical_columns(dataframe) | ||
for column in num_columns: | ||
discretized_df.loc[:, column] = self._discretize_column( | ||
discretized_df[column] | ||
) | ||
|
||
discretized_df = discretized_df[all_columns] | ||
return ( | ||
discretized_df.reset_index(drop=True) | ||
if self.reset_index | ||
else discretized_df | ||
) | ||
|
||
def _discretize_column(self, column: pd.Series) -> pd.Series: | ||
if self.discretization_type == DiscretizationType.QUANTILE: | ||
return self._descritize_quantile(column) | ||
|
||
elif self.discretization_type == DiscretizationType.UNIFORM: | ||
return self._descritize_uniform(column) | ||
|
||
def _descritize_quantile(self, column: pd.Series) -> pd.Series: | ||
return pd.qcut( | ||
column, q=self.n_bins, labels=False, retbins=False, duplicates="drop" | ||
).values | ||
|
||
def _descritize_uniform(self, column: pd.Series) -> pd.Series: | ||
return pd.cut( | ||
column, bins=self.n_bins, labels=False, retbins=True, duplicates="drop" | ||
)[0].values | ||
|
||
def _get_numerical_columns(self, dataframe: pd.DataFrame) -> List[str]: | ||
return dataframe.select_dtypes(include=np.number).columns.tolist() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.