Skip to content

Commit

Permalink
Add tsfresh into optional dependencies (#1246)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mr-Geekman authored Apr 27, 2023
1 parent 40800fa commit 5b9783f
Show file tree
Hide file tree
Showing 20 changed files with 879 additions and 604 deletions.
1 change: 0 additions & 1 deletion .github/workflows/docs-on-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/docs-unstable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,11 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true
- name: Install dependencies
run: |
poetry install -E "all release jupyter" -vv
poetry run pip install tsfresh==0.19.0
poetry run pip install protobuf==3.20.1
poetry install -E "all release jupyter classification" -vv
- name: Notebook runner
run: |
poetry run python -m scripts.notebook_runner
7 changes: 0 additions & 7 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down Expand Up @@ -89,7 +88,6 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down Expand Up @@ -129,7 +127,6 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down Expand Up @@ -169,15 +166,12 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

- name: Install dependencies
run: |
poetry install -E "all tests" -vv
poetry run pip install tsfresh==0.19.0
poetry run pip install protobuf==3.20.1
- name: PyTest ("experimental")
run: |
Expand Down Expand Up @@ -209,7 +203,6 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
- Fix warning during creation of `ResampleWithDistributionTransform` ([#1230](https://github.com/tinkoff-ai/etna/pull/1230))
- Add deep copy for copying attributes of `TSDataset` ([#1241](https://github.com/tinkoff-ai/etna/pull/1241))
-
- Add `tsfresh` into optional dependencies, remove instruction about `pip install tsfresh` ([#1246](https://github.com/tinkoff-ai/etna/pull/1246))
-

## [2.0.0] - 2023-04-11
### Added
Expand Down
7 changes: 5 additions & 2 deletions etna/experimental/classification/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from etna.experimental.classification.classification import TimeSeriesBinaryClassifier
from etna.experimental.classification.predictability import PredictabilityAnalyzer
from etna import SETTINGS

if SETTINGS.classification_required:
from etna.experimental.classification.classification import TimeSeriesBinaryClassifier
from etna.experimental.classification.predictability import PredictabilityAnalyzer
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from etna.experimental.classification.feature_extraction.base import BaseTimeSeriesFeatureExtractor
from etna.experimental.classification.feature_extraction.tsfresh import TSFreshFeatureExtractor
from etna.experimental.classification.feature_extraction.weasel import WEASELFeatureExtractor
from etna import SETTINGS

if SETTINGS.classification_required:
from etna.experimental.classification.feature_extraction.base import BaseTimeSeriesFeatureExtractor
from etna.experimental.classification.feature_extraction.tsfresh import TSFreshFeatureExtractor
from etna.experimental.classification.feature_extraction.weasel import WEASELFeatureExtractor
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from etna import SETTINGS

if SETTINGS.tsfresh_required:
if SETTINGS.classification_required:
from tsfresh import extract_features
from tsfresh.feature_extraction.settings import MinimalFCParameters

Expand Down
11 changes: 9 additions & 2 deletions etna/experimental/classification/feature_extraction/weasel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,25 @@

import numpy as np
from numpy.lib.stride_tricks import sliding_window_view
from pyts.approximation import SymbolicFourierApproximation
from pyts.transformation import WEASEL
from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix
from scipy.sparse import hstack
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import chi2
from typing_extensions import Literal

from etna import SETTINGS
from etna.experimental.classification.feature_extraction.base import BaseTimeSeriesFeatureExtractor
from etna.experimental.classification.utils import padd_single_series

if SETTINGS.classification_required:
from pyts.approximation import SymbolicFourierApproximation
from pyts.transformation import WEASEL
else:
from unittest.mock import Mock

WEASEL = Mock # type: ignore


class CustomWEASEL(WEASEL):
"""Improved version of WEASEL transform to work with the series of different length."""
Expand Down
10 changes: 8 additions & 2 deletions etna/libs/tsfresh/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
# Note: Copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/ff69073bbb4df787fcbf277a611c6b40632e767d/tsfresh/defaults.py)
# Note: Copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/v0.20.0/tsfresh/defaults.py)

import os
from multiprocessing import cpu_count

n_cores = int(os.getenv("NUMBER_OF_CPUS") or cpu_count())

CHUNKSIZE = None
N_PROCESSES = 1
N_PROCESSES = max(1, n_cores // 2)
PROFILING = False
PROFILING_SORTING = "cumulative"
PROFILING_FILENAME = "profile.txt"
Expand Down
5 changes: 4 additions & 1 deletion etna/libs/tsfresh/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
# Note: Originally copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/v0.20.0/tsfresh/utilities/distribution.py)

import warnings
# Note: Originally copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/ff69073bbb4df787fcbf277a611c6b40632e767d/tsfresh/utilities/distribution.py)


def initialize_warnings_in_workers(show_warnings):
"""
Small helper function to initialize warnings module in multiprocessing workers.
Expand Down
15 changes: 8 additions & 7 deletions etna/libs/tsfresh/relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,26 @@
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
# Note: Originally copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/v0.20.0/tsfresh/feature_selection/relevance.py)

from multiprocessing import Pool
import warnings
from functools import partial, reduce
from multiprocessing import Pool

import numpy as np
import pandas as pd
from functools import partial, reduce
from statsmodels.stats.multitest import multipletests

from etna.libs.tsfresh import defaults
from etna.libs.tsfresh.significance_tests import (
target_binary_feature_binary_test,
target_binary_feature_real_test,
target_real_feature_binary_test,
target_real_feature_real_test,
target_binary_feature_binary_test,
)
from etna.libs.tsfresh.distribution import initialize_warnings_in_workers

# Note: Originally copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/ff69073bbb4df787fcbf277a611c6b40632e767d/tsfresh/feature_selection/relevance.py)

def calculate_relevance_table(
X,
y,
Expand Down Expand Up @@ -199,7 +200,7 @@ def calculate_relevance_table(
else:
warnings.simplefilter("default")

if n_jobs == 0:
if n_jobs == 0 or n_jobs == 1:
map_function = map
else:
pool = Pool(
Expand Down Expand Up @@ -234,7 +235,7 @@ def calculate_relevance_table(
)

if len(table_const) == len(relevance_table):
if n_jobs != 0:
if n_jobs < 0 or n_jobs > 1:
pool.close()
pool.terminate()
pool.join()
Expand Down Expand Up @@ -301,7 +302,7 @@ def calculate_relevance_table(
map_function,
)

if n_jobs != 0:
if n_jobs < 0 or n_jobs > 1:
pool.close()
pool.terminate()
pool.join()
Expand Down
50 changes: 32 additions & 18 deletions etna/libs/tsfresh/significance_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
# Note: Originally copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/v0.20.0/tsfresh/feature_selection/significance_tests.py)

import warnings
from builtins import str

import numpy as np
import pandas as pd
from scipy import stats
import warnings

# Note: Originally copied from tsfresh package (https://github.com/blue-yonder/tsfresh/blob/ff69073bbb4df787fcbf277a611c6b40632e767d/tsfresh/feature_selection/significance_tests.py)

def target_binary_feature_binary_test(x, y):
"""
Calculate the feature significance of a binary feature to a binary target as a p-value.
Expand Down Expand Up @@ -56,8 +59,7 @@ def target_binary_feature_binary_test(x, y):
n_y1_x1 = np.sum(y[x == x1] == y1)
n_y0_x1 = len(y[x == x1]) - n_y1_x1

table = np.array([[n_y1_x1, n_y1_x0],
[n_y0_x1, n_y0_x0]])
table = np.array([[n_y1_x1, n_y1_x0], [n_y0_x1, n_y0_x0]])

# Perform the Fisher test
oddsratio, p_value = stats.fisher_exact(table, alternative="two-sided")
Expand Down Expand Up @@ -99,17 +101,21 @@ def target_binary_feature_real_test(x, y, test):
x_y1 = x[y == y1]
x_y0 = x[y == y0]

if test == 'mann':
if test == "mann":
# Perform Mann-Whitney-U test
U, p_mannwhitu = stats.mannwhitneyu(x_y1, x_y0, use_continuity=True, alternative='two-sided')
U, p_mannwhitu = stats.mannwhitneyu(
x_y1, x_y0, use_continuity=True, alternative="two-sided"
)
return p_mannwhitu
elif test == 'smir':
elif test == "smir":
# Perform Kolmogorov-Smirnov test
KS, p_ks = stats.ks_2samp(x_y1, x_y0)
return p_ks
else:
raise ValueError("Please use a valid entry for test_for_binary_target_real_feature. " +
"Valid entries are 'mann' and 'smir'.")
raise ValueError(
"Please use a valid entry for test_for_binary_target_real_feature. "
+ "Valid entries are 'mann' and 'smir'."
)


def target_real_feature_binary_test(x, y):
Expand Down Expand Up @@ -208,9 +214,12 @@ def __check_for_binary_target(y):
if len(set(y)) > 2:
raise ValueError("Target is not binary!")

warnings.warn("The binary target should have "
"values 1 and 0 (or True and False). "
"Instead found" + str(set(y)), RuntimeWarning)
warnings.warn(
"The binary target should have "
"values 1 and 0 (or True and False). "
"Instead found" + str(set(y)),
RuntimeWarning,
)


def __check_for_binary_feature(x):
Expand All @@ -228,11 +237,16 @@ def __check_for_binary_feature(x):
"""
if not set(x) == {0, 1}:
if len(set(x)) > 2:
raise ValueError("[target_binary_feature_binary_test] Feature is not binary!")
raise ValueError(
"[target_binary_feature_binary_test] Feature is not binary!"
)

warnings.warn("A binary feature should have only "
"values 1 and 0 (incl. True and False). "
"Instead found " + str(set(x)) + " in feature ''" + str(x.name) + "''.", RuntimeWarning)
warnings.warn(
"A binary feature should have only "
"values 1 and 0 (incl. True and False). "
"Instead found " + str(set(x)) + " in feature ''" + str(x.name) + "''.",
RuntimeWarning,
)


def _check_for_nans(x, y):
Expand All @@ -245,6 +259,6 @@ def _check_for_nans(x, y):
:raises: `ValueError` if target or feature contains NaNs.
"""
if np.isnan(x.values).any():
raise ValueError('Feature {} contains NaN values'.format(x.name))
raise ValueError("Feature {} contains NaN values".format(x.name))
elif np.isnan(y.values).any():
raise ValueError('Target contains NaN values')
raise ValueError("Target contains NaN values")
19 changes: 9 additions & 10 deletions etna/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,12 @@ def _is_prophet_available():
return False


def _is_tsfresh_available():
if _module_available("tsfresh"):
def _is_classification_available():
true_case = _module_available("pyts") & _module_available("tsfresh")
if true_case:
return True
else:
warnings.warn(
"`tsfresh` is not available, to install it, run `pip install tsfresh==0.19.0 && pip install protobuf==3.20.1`"
)
warnings.warn("etna[classification] is not available, to install it, run `pip install etna[classification]`")
return False


Expand All @@ -83,7 +82,7 @@ def __init__( # noqa: D107
torch_required: Optional[bool] = None,
prophet_required: Optional[bool] = None,
wandb_required: Optional[bool] = None,
tsfresh_required: Optional[bool] = None,
classification_required: Optional[bool] = None,
):
# True – use the package
# None – use the package if available
Expand All @@ -101,10 +100,10 @@ def __init__( # noqa: D107
_is_prophet_available,
"etna[prophet] is not available, to install it, run `pip install etna[prophet]`.",
)
self.tsfresh_required: bool = _get_optional_value(
tsfresh_required,
_is_tsfresh_available,
"`tsfresh` is not available, to install it, run `pip install tsfresh==0.19.0 && pip install protobuf==3.20.1`",
self.classification_required: bool = _get_optional_value(
classification_required,
_is_classification_available,
"etna[classification] is not available, to install it, run `pip install etna[classification]`.",
)

@staticmethod
Expand Down
234 changes: 99 additions & 135 deletions examples/classification.ipynb

Large diffs are not rendered by default.

Loading

1 comment on commit 5b9783f

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.