From 013d7eee3c46f0f0c0f66ab7eac9dd1945faf101 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 31 Aug 2022 18:32:01 +0200 Subject: [PATCH] Louquinze development (#1578) * create new text preprocessing cs * create new text preprocessing cs * set new defaults for text encoding * set new defaults for text encoding * set new defaults for text encoding * Fix bug, rework tests Co-authored-by: lukas --- .../data_preprocessing/feature_type.py | 4 +- .../feature_type_categorical.py | 3 - .../feature_type_numerical.py | 4 - .../text_encoding/__init__.py | 2 +- .../text_encoding/bag_of_word_encoding.py | 134 ------ .../bag_of_word_encoding_distinct.py | 140 ------ .../text_encoding/tfidf_encoding.py | 212 ++++++--- .../text_feature_reduction/truncated_svd.py | 10 +- .../test_meta_base_data/configurations.csv | 258 +++++------ .../test_metalearning_configuration.py | 24 +- .../test_data_preprocessing_feat_type.py | 21 +- .../test_data_preprocessing_text.py | 411 ++++++++++++------ test/test_pipeline/test_classification.py | 4 +- test/test_pipeline/test_regression.py | 2 +- 14 files changed, 584 insertions(+), 645 deletions(-) delete mode 100644 autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py delete mode 100644 autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 11085a1f74..7f52b07dbc 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -267,7 +267,9 @@ def set_hyperparameters( sub_config_dict[new_name] = value sub_configuration = Configuration( - sub_configuration_space, values=sub_config_dict + sub_configuration_space, + values=sub_config_dict, + allow_inactive_with_values=True, ) sub_init_params_dict: Optional[Dict[str, Any]] = None diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py index 07cfeb7fa5..dbe8adca4a 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py @@ -33,12 +33,10 @@ class CategoricalPreprocessingPipeline(BasePipeline): 3 - Minority coalescence: Assign category 1 to all categories whose occurrence don't sum-up to a certain minimum fraction 4 - One hot encoding: usual sklearn one hot encoding - Parameters ---------- config : ConfigSpace.configuration_space.Configuration The configuration to evaluate. - random_state : Optional[int | RandomState] If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; @@ -101,7 +99,6 @@ def _get_hyperparameter_search_space( dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: """Create the hyperparameter configuration space. - Returns ------- cs : ConfigSpace.configuration_space.Configuration diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py index 5cc3f19561..1344ac8a96 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py @@ -26,12 +26,10 @@ class NumericalPreprocessingPipeline(BasePipeline): 2 - VarianceThreshold: Removes low-variance features 3 - Rescaling: rescale features according to a certain rule (e.g. normalization, standartization or min-max) - Parameters ---------- config : ConfigSpace.configuration_space.Configuration The configuration to evaluate. - random_state : Optional[int | RandomState] If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; @@ -94,10 +92,8 @@ def _get_hyperparameter_search_space( dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: """Create the hyperparameter configuration space. - Parameters ---------- - Returns ------- cs : ConfigSpace.configuration_space.Configuration diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py index 75c173e181..f858781997 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py @@ -63,7 +63,7 @@ def get_hyperparameter_search_space( ) if default is None: - defaults = ["bag_of_words_encoding"] + defaults = ["tfidf_encoding"] for default_ in defaults: if default_ in available_preprocessors: default = default_ diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py deleted file mode 100644 index a90b1c1fa4..0000000000 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py +++ /dev/null @@ -1,134 +0,0 @@ -from typing import Dict, Optional, Tuple, Union - -import itertools - -import ConfigSpace.hyperparameters as CSH -import numpy as np -import pandas as pd -from ConfigSpace import EqualsCondition -from ConfigSpace.configuration_space import ConfigurationSpace -from sklearn.feature_extraction.text import CountVectorizer - -from autosklearn.askl_typing import FEAT_TYPE_TYPE -from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE -from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA - - -class BagOfWordEncoder(AutoSklearnPreprocessingAlgorithm): - def __init__( - self, - ngram_upper_bound: int = 1, - min_df_choice: str = "min_df_absolute", - min_df_absolute: int = 0, - min_df_relative: float = 0.01, - random_state: Optional[Union[int, np.random.RandomState]] = None, - ) -> None: - self.ngram_upper_bound = ngram_upper_bound - self.random_state = random_state - self.min_df_choice = min_df_choice - self.min_df_absolute = min_df_absolute - self.min_df_relative = min_df_relative - - def fit( - self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None - ) -> "BagOfWordEncoder": - - if not isinstance(X, pd.DataFrame): - raise ValueError( - "Your text data is not encoded in a pandas.DataFrame\n" - "Please make sure to use a pandas.DataFrame and ensure" - "that the text features are encoded as strings." - ) - - X.fillna("", inplace=True) - - # define a CountVectorizer for used on every feature - if self.min_df_choice == "min_df_absolute": - self.preprocessor = CountVectorizer( - min_df=self.min_df_absolute, - ngram_range=(1, self.ngram_upper_bound), - ) - - elif self.min_df_choice == "min_df_relative": - self.preprocessor = CountVectorizer( - min_df=self.min_df_relative, - ngram_range=(1, self.ngram_upper_bound), - ) - - else: - raise KeyError() - - all_text = itertools.chain.from_iterable(X[col] for col in X.columns) - self.preprocessor = self.preprocessor.fit(all_text) - - return self - - def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: - X.fillna("", inplace=True) - X_transformed = None - if self.preprocessor is None: - raise NotImplementedError() - for feature in X.columns: - if X_transformed is None: - X_transformed = self.preprocessor.transform(X[feature]) - else: - X_transformed += self.preprocessor.transform(X[feature]) - return X_transformed - - @staticmethod - def get_properties( - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: - return { - "shortname": "BOW", - "name": "Bag Of Word Encoder", - "handles_regression": True, - "handles_classification": True, - "handles_multiclass": True, - "handles_multilabel": True, - "handles_multioutput": True, - "handles_sparse": True, - "handles_dense": True, - "input": (DENSE, SPARSE, UNSIGNED_DATA), - "output": (INPUT,), - } - - @staticmethod - def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - ) -> ConfigurationSpace: - cs = ConfigurationSpace() - hp_ngram_upper_bound = CSH.UniformIntegerHyperparameter( - name="ngram_upper_bound", lower=1, upper=3, default_value=1 - ) - hp_min_df_choice_bow = CSH.CategoricalHyperparameter( - "min_df_choice", choices=["min_df_absolute", "min_df_relative"] - ) - hp_min_df_absolute_bow = CSH.UniformIntegerHyperparameter( - name="min_df_absolute", lower=0, upper=10, default_value=0 - ) - hp_min_df_relative_bow = CSH.UniformFloatHyperparameter( - name="min_df_relative", lower=0.01, upper=1.0, default_value=0.01, log=True - ) - cs.add_hyperparameters( - [ - hp_ngram_upper_bound, - hp_min_df_choice_bow, - hp_min_df_absolute_bow, - hp_min_df_relative_bow, - ] - ) - - cond_min_df_absolute_bow = EqualsCondition( - hp_min_df_absolute_bow, hp_min_df_choice_bow, "min_df_absolute" - ) - cond_min_df_relative_bow = EqualsCondition( - hp_min_df_relative_bow, hp_min_df_choice_bow, "min_df_relative" - ) - cs.add_conditions([cond_min_df_absolute_bow, cond_min_df_relative_bow]) - - # maybe add bigrams ... - - return cs diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py deleted file mode 100644 index de852b5d6b..0000000000 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py +++ /dev/null @@ -1,140 +0,0 @@ -from typing import Dict, Optional, Tuple, Union - -import ConfigSpace.hyperparameters as CSH -import numpy as np -import pandas as pd -from ConfigSpace import EqualsCondition -from ConfigSpace.configuration_space import ConfigurationSpace -from scipy.sparse import hstack -from sklearn.feature_extraction.text import CountVectorizer - -from autosklearn.askl_typing import FEAT_TYPE_TYPE -from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE -from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA - - -class BagOfWordEncoder(AutoSklearnPreprocessingAlgorithm): - def __init__( - self, - ngram_upper_bound: int = 1, - min_df_choice: str = "min_df_absolute", - min_df_absolute: int = 0, - min_df_relative: float = 0.01, - random_state: Optional[Union[int, np.random.RandomState]] = None, - ) -> None: - self.ngram_upper_bound = ngram_upper_bound - self.random_state = random_state - self.min_df_choice = min_df_choice - self.min_df_absolute = min_df_absolute - self.min_df_relative = min_df_relative - - def fit( - self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None - ) -> "BagOfWordEncoder": - - if isinstance(X, pd.DataFrame): - X.fillna("", inplace=True) - if self.min_df_choice == "min_df_absolute": - - self.preprocessor = {} - - for feature in X.columns: - vectorizer = CountVectorizer( - min_df=self.min_df_absolute, - ngram_range=(1, self.ngram_upper_bound), - ).fit(X[feature]) - self.preprocessor[feature] = vectorizer - - elif self.min_df_choice == "min_df_relative": - - self.preprocessor = {} - - for feature in X.columns: - vectorizer = CountVectorizer( - min_df=self.min_df_relative, - ngram_range=(1, self.ngram_upper_bound), - ).fit(X[feature]) - self.preprocessor[feature] = vectorizer - else: - raise KeyError() - else: - raise ValueError( - "Your text data is not encoded in a pandas.DataFrame\n" - "Please make sure to use a pandas.DataFrame and ensure" - "that the text features are encoded as strings." - ) - return self - - def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: - X.fillna("", inplace=True) - X_new = None - if self.preprocessor is None: - raise NotImplementedError() - - for feature in self.preprocessor: - # the names in the dataframe must not change - if X_new is None: - X_new = self.preprocessor[feature].transform(X[feature]) - else: - X_transformed = self.preprocessor[feature].transform(X[feature]) - X_new = hstack([X_new, X_transformed]) - - return X_new - - @staticmethod - def get_properties( - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: - return { - "shortname": "BOW", - "name": "Bag Of Word Encoder", - "handles_regression": True, - "handles_classification": True, - "handles_multiclass": True, - "handles_multilabel": True, - "handles_multioutput": True, - "handles_sparse": True, - "handles_dense": True, - "input": (DENSE, SPARSE, UNSIGNED_DATA), - "output": (INPUT,), - } - - @staticmethod - def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - ) -> ConfigurationSpace: - cs = ConfigurationSpace() - hp_ngram_upper_bound = CSH.UniformIntegerHyperparameter( - name="ngram_upper_bound", lower=1, upper=3, default_value=1 - ) - hp_min_df_choice_bow = CSH.CategoricalHyperparameter( - "min_df_choice", choices=["min_df_absolute", "min_df_relative"] - ) - hp_min_df_absolute_bow = CSH.UniformIntegerHyperparameter( - name="min_df_absolute", lower=0, upper=10, default_value=0 - ) - hp_min_df_relative_bow = CSH.UniformFloatHyperparameter( - name="min_df_relative", lower=0.01, upper=1.0, default_value=0.01, log=True - ) - cs.add_hyperparameters( - [ - hp_ngram_upper_bound, - hp_min_df_choice_bow, - hp_min_df_absolute_bow, - hp_min_df_relative_bow, - ] - ) - - cond_min_df_absolute_bow = EqualsCondition( - hp_min_df_absolute_bow, hp_min_df_choice_bow, "min_df_absolute" - ) - cond_min_df_relative_bow = EqualsCondition( - hp_min_df_relative_bow, hp_min_df_choice_bow, "min_df_relative" - ) - cs.add_conditions([cond_min_df_absolute_bow, cond_min_df_relative_bow]) - - # maybe add bigrams ... - - return cs diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py index 3956ec9eff..39fa39265d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py @@ -7,87 +7,125 @@ import pandas as pd from ConfigSpace import EqualsCondition from ConfigSpace.configuration_space import ConfigurationSpace +from scipy.sparse import hstack from sklearn.feature_extraction.text import TfidfVectorizer from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA +from autosklearn.util.common import check_for_bool class TfidfEncoder(AutoSklearnPreprocessingAlgorithm): def __init__( self, - ngram_upper_bound: int = 1, - use_idf: bool = True, - min_df_choice: str = "min_df_absolute", - min_df_absolute: int = 0, - min_df_relative: float = 0.01, + ngram_range_word: int = 1, + ngram_range_char: int = 4, + min_df: float = 0.0, + max_df: float = 1.0, + binary: bool = False, + norm: str = "l2", + sublinear_tf: bool = False, + per_column: bool = False, + analyzer: str = "char", random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> None: - self.ngram_upper_bound = ngram_upper_bound self.random_state = random_state - self.use_idf = use_idf - self.min_df_choice = min_df_choice - self.min_df_absolute = min_df_absolute - self.min_df_relative = min_df_relative + self.min_df = min_df + self.max_df = max_df + self.binary = binary + self.norm = norm + self.sublinear_tf = sublinear_tf + self.per_column = per_column + self.analyzer = analyzer + self.ngram_range_word = ngram_range_word + self.ngram_range_char = ngram_range_char def fit( - self, - X: PIPELINE_DATA_DTYPE, - y: Optional[PIPELINE_DATA_DTYPE] = None, + self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None ) -> "TfidfEncoder": + if self.analyzer == "word": + ngram_range = self.ngram_range_word + elif self.analyzer == "char": + ngram_range = self.ngram_range_char + else: + raise KeyError(f"Analyzer is not defined for {self.analyzer}") + + self.sublinear_tf = check_for_bool(self.sublinear_tf) + self.binary = check_for_bool(self.binary) + + if isinstance(X, pd.DataFrame): + X.fillna("", inplace=True) + if self.per_column: + self.preprocessor = {} + + for feature in X.columns: + vectorizer = TfidfVectorizer( + ngram_range=(ngram_range, ngram_range), + min_df=self.min_df, + max_df=self.max_df, + binary=self.binary, + norm=self.norm, + sublinear_tf=self.sublinear_tf, + analyzer=self.analyzer, + ).fit(X[feature]) + self.preprocessor[feature] = vectorizer + else: + self.preprocessor = TfidfVectorizer( + ngram_range=(ngram_range, ngram_range), + min_df=self.min_df, + max_df=self.max_df, + binary=self.binary, + norm=self.norm, + sublinear_tf=self.sublinear_tf, + analyzer=self.analyzer, + ) + all_text = itertools.chain.from_iterable(X[col] for col in X.columns) + self.preprocessor = self.preprocessor.fit(all_text) - if not isinstance(X, pd.DataFrame): + else: raise ValueError( "Your text data is not encoded in a pandas.DataFrame\n" "Please make sure to use a pandas.DataFrame and ensure" - " that the text features are encoded as strings." - ) - - X.fillna("", inplace=True) - - if self.min_df_choice == "min_df_absolute": - self.preprocessor = TfidfVectorizer( - min_df=self.min_df_absolute, - use_idf=self.use_idf, - ngram_range=(1, self.ngram_upper_bound), + "that the text features are encoded as strings." ) - - elif self.min_df_choice == "min_df_relative": - self.preprocessor = TfidfVectorizer( - min_df=self.min_df_relative, - use_idf=self.use_idf, - ngram_range=(1, self.ngram_upper_bound), - ) - - else: - raise KeyError() - - all_text = itertools.chain.from_iterable(X[col] for col in X.columns) - self.preprocessor = self.preprocessor.fit(all_text) - return self def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: X.fillna("", inplace=True) - X_transformed = None - if self.preprocessor is None: - raise NotImplementedError() - for feature in X.columns: - if X_transformed is None: - X_transformed = self.preprocessor.transform(X[feature]) - else: - X_transformed += self.preprocessor.transform(X[feature]) - return X_transformed + if self.per_column: + X_new = None + if self.preprocessor is None: + raise NotImplementedError() + + for feature in self.preprocessor: + # the names in the dataframe must not change + if X_new is None: + X_new = self.preprocessor[feature].transform(X[feature]) + else: + X_transformed = self.preprocessor[feature].transform(X[feature]) + X_new = hstack([X_new, X_transformed]) + + return X_new + else: + X_transformed = None + if self.preprocessor is None: + raise NotImplementedError() + for feature in X.columns: + if X_transformed is None: + X_transformed = self.preprocessor.transform(X[feature]) + else: + X_transformed += self.preprocessor.transform(X[feature]) + return X_transformed @staticmethod def get_properties( dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: return { - "shortname": "RBOW", - "name": "Relative Bag Of Word Encoder", + "shortname": "TF/IDF", + "name": "TF/IDF Encoder", "handles_regression": True, "handles_classification": True, "handles_multiclass": True, @@ -105,37 +143,67 @@ def get_hyperparameter_search_space( dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() - hp_ngram_upper_bound = CSH.UniformIntegerHyperparameter( - name="ngram_upper_bound", lower=1, upper=3, default_value=1 + + hp_analyzer = CSH.CategoricalHyperparameter( + name="analyzer", + choices=["word", "char"], + default_value="char", ) - hp_use_idf = CSH.CategoricalHyperparameter("use_idf", choices=[False, True]) - hp_min_df_choice = CSH.CategoricalHyperparameter( - "min_df_choice", choices=["min_df_absolute", "min_df_relative"] + + hp_ngram_range_char = CSH.UniformIntegerHyperparameter( + name="ngram_range_char", + lower=2, + upper=4, + default_value=4, ) - hp_min_df_absolute = CSH.UniformIntegerHyperparameter( - name="min_df_absolute", lower=0, upper=10, default_value=0 + + hp_ngram_range_word = CSH.UniformIntegerHyperparameter( + name="ngram_range_word", + lower=1, + upper=3, + default_value=1, ) - hp_min_df_relative = CSH.UniformFloatHyperparameter( - name="min_df_relative", lower=0.01, upper=1.0, default_value=0.01, log=True + + hp_min_df = CSH.UniformFloatHyperparameter( + name="min_df", + lower=0.0, + upper=0.3, + default_value=0.0, ) - cs.add_hyperparameters( - [ - hp_ngram_upper_bound, - hp_use_idf, - hp_min_df_choice, - hp_min_df_absolute, - hp_min_df_relative, - ] + + hp_max_df = CSH.UniformFloatHyperparameter( + name="max_df", lower=0.7, upper=1.0, default_value=1.0 ) - cond_min_df_absolute = EqualsCondition( - hp_min_df_absolute, hp_min_df_choice, "min_df_absolute" + hp_binary = CSH.UnParametrizedHyperparameter(name="binary", value="False") + + hp_norm = CSH.CategoricalHyperparameter( + name="norm", choices=["l2", "l1"], default_value="l2" + ) + + hp_sublinear_tf = CSH.UnParametrizedHyperparameter( + name="sublinear_tf", value="False" ) - cond_min_df_relative = EqualsCondition( - hp_min_df_relative, hp_min_df_choice, "min_df_relative" + + hp_per_column = CSH.CategoricalHyperparameter( + name="per_column", choices=[True, False], default_value=False ) - cs.add_conditions([cond_min_df_absolute, cond_min_df_relative]) - # maybe add bigrams ... + cs.add_hyperparameters( + [ + hp_analyzer, + hp_ngram_range_char, + hp_ngram_range_word, + hp_max_df, + hp_min_df, + hp_binary, + hp_norm, + hp_sublinear_tf, + hp_per_column, + ] + ) + cond_char = EqualsCondition(hp_ngram_range_char, hp_analyzer, "char") + cond_word = EqualsCondition(hp_ngram_range_word, hp_analyzer, "word") + cs.add_conditions([cond_char, cond_word]) return cs diff --git a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py index d6380e03dd..1d8ce91c49 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py @@ -31,22 +31,28 @@ def fit( self.preprocessor = TruncatedSVD( n_components=self.n_components, random_state=self.random_state ) + self.preprocessor.fit(X) elif X.shape[1] <= self.n_components and X.shape[1] != 1: self.preprocessor = TruncatedSVD( n_components=X.shape[1] - 1, random_state=self.random_state ) + self.preprocessor.fit(X) + elif X.shape[1] == 1: + self.preprocessor = "passthrough" else: raise ValueError( "The text embedding consists only of a single dimension.\n" "Are you sure that your text data is necessary?" ) - self.preprocessor.fit(X) return self def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: if self.preprocessor is None: raise NotImplementedError() - return self.preprocessor.transform(X) + elif self.preprocessor == "passthrough": + return X + else: + return self.preprocessor.transform(X) @staticmethod def get_properties( diff --git a/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv b/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv index 36263d3333..506d45112b 100755 --- a/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv +++ b/test/test_metalearning/pyMetaLearn/test_meta_base_data/configurations.csv @@ -1,129 +1,129 @@ -idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,classifier:adaboost:learning_rate,classifier:adaboost:max_depth,classifier:adaboost:n_estimators,classifier:bernoulli_nb:alpha,classifier:bernoulli_nb:fit_prior,classifier:decision_tree:criterion,classifier:decision_tree:max_depth_factor,classifier:decision_tree:max_features,classifier:decision_tree:max_leaf_nodes,classifier:decision_tree:min_impurity_decrease,classifier:decision_tree:min_samples_leaf,classifier:decision_tree:min_samples_split,classifier:decision_tree:min_weight_fraction_leaf,classifier:extra_trees:bootstrap,classifier:extra_trees:criterion,classifier:extra_trees:max_depth,classifier:extra_trees:max_features,classifier:extra_trees:max_leaf_nodes,classifier:extra_trees:min_impurity_decrease,classifier:extra_trees:min_samples_leaf,classifier:extra_trees:min_samples_split,classifier:extra_trees:min_weight_fraction_leaf,classifier:gradient_boosting:early_stop,classifier:gradient_boosting:l2_regularization,classifier:gradient_boosting:learning_rate,classifier:gradient_boosting:loss,classifier:gradient_boosting:max_bins,classifier:gradient_boosting:max_depth,classifier:gradient_boosting:max_leaf_nodes,classifier:gradient_boosting:min_samples_leaf,classifier:gradient_boosting:n_iter_no_change,classifier:gradient_boosting:scoring,classifier:gradient_boosting:tol,classifier:gradient_boosting:validation_fraction,classifier:k_nearest_neighbors:n_neighbors,classifier:k_nearest_neighbors:p,classifier:k_nearest_neighbors:weights,classifier:lda:shrinkage,classifier:lda:shrinkage_factor,classifier:lda:tol,classifier:liblinear_svc:C,classifier:liblinear_svc:dual,classifier:liblinear_svc:fit_intercept,classifier:liblinear_svc:intercept_scaling,classifier:liblinear_svc:loss,classifier:liblinear_svc:multi_class,classifier:liblinear_svc:penalty,classifier:liblinear_svc:tol,classifier:libsvm_svc:C,classifier:libsvm_svc:coef0,classifier:libsvm_svc:degree,classifier:libsvm_svc:gamma,classifier:libsvm_svc:kernel,classifier:libsvm_svc:max_iter,classifier:libsvm_svc:shrinking,classifier:libsvm_svc:tol,classifier:multinomial_nb:alpha,classifier:multinomial_nb:fit_prior,classifier:passive_aggressive:C,classifier:passive_aggressive:average,classifier:passive_aggressive:fit_intercept,classifier:passive_aggressive:loss,classifier:passive_aggressive:tol,classifier:qda:reg_param,classifier:random_forest:bootstrap,classifier:random_forest:criterion,classifier:random_forest:max_depth,classifier:random_forest:max_features,classifier:random_forest:max_leaf_nodes,classifier:random_forest:min_impurity_decrease,classifier:random_forest:min_samples_leaf,classifier:random_forest:min_samples_split,classifier:random_forest:min_weight_fraction_leaf,classifier:sgd:alpha,classifier:sgd:average,classifier:sgd:epsilon,classifier:sgd:eta0,classifier:sgd:fit_intercept,classifier:sgd:l1_ratio,classifier:sgd:learning_rate,classifier:sgd:loss,classifier:sgd:penalty,classifier:sgd:power_t,classifier:sgd:tol,data_preprocessor:feature_type:categorical_transformer:categorical_encoding:__choice__,data_preprocessor:feature_type:categorical_transformer:category_coalescence:__choice__,data_preprocessor:feature_type:categorical_transformer:category_coalescence:minority_coalescer:minimum_fraction,data_preprocessor:feature_type:numerical_transformer:imputation:strategy,data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__,data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles,data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution,data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_max,data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_min,feature_preprocessor:__choice__,feature_preprocessor:extra_trees_preproc_for_classification:bootstrap,feature_preprocessor:extra_trees_preproc_for_classification:criterion,feature_preprocessor:extra_trees_preproc_for_classification:max_depth,feature_preprocessor:extra_trees_preproc_for_classification:max_features,feature_preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes,feature_preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_leaf,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_split,feature_preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf,feature_preprocessor:extra_trees_preproc_for_classification:n_estimators,feature_preprocessor:fast_ica:algorithm,feature_preprocessor:fast_ica:fun,feature_preprocessor:fast_ica:n_components,feature_preprocessor:fast_ica:whiten,feature_preprocessor:feature_agglomeration:affinity,feature_preprocessor:feature_agglomeration:linkage,feature_preprocessor:feature_agglomeration:n_clusters,feature_preprocessor:feature_agglomeration:pooling_func,feature_preprocessor:kernel_pca:coef0,feature_preprocessor:kernel_pca:degree,feature_preprocessor:kernel_pca:gamma,feature_preprocessor:kernel_pca:kernel,feature_preprocessor:kernel_pca:n_components,feature_preprocessor:kitchen_sinks:gamma,feature_preprocessor:kitchen_sinks:n_components,feature_preprocessor:liblinear_svc_preprocessor:C,feature_preprocessor:liblinear_svc_preprocessor:dual,feature_preprocessor:liblinear_svc_preprocessor:fit_intercept,feature_preprocessor:liblinear_svc_preprocessor:intercept_scaling,feature_preprocessor:liblinear_svc_preprocessor:loss,feature_preprocessor:liblinear_svc_preprocessor:multi_class,feature_preprocessor:liblinear_svc_preprocessor:penalty,feature_preprocessor:liblinear_svc_preprocessor:tol,feature_preprocessor:nystroem_sampler:coef0,feature_preprocessor:nystroem_sampler:degree,feature_preprocessor:nystroem_sampler:gamma,feature_preprocessor:nystroem_sampler:kernel,feature_preprocessor:nystroem_sampler:n_components,feature_preprocessor:pca:keep_variance,feature_preprocessor:pca:whiten,feature_preprocessor:polynomial:degree,feature_preprocessor:polynomial:include_bias,feature_preprocessor:polynomial:interaction_only,feature_preprocessor:random_trees_embedding:bootstrap,feature_preprocessor:random_trees_embedding:max_depth,feature_preprocessor:random_trees_embedding:max_leaf_nodes,feature_preprocessor:random_trees_embedding:min_samples_leaf,feature_preprocessor:random_trees_embedding:min_samples_split,feature_preprocessor:random_trees_embedding:min_weight_fraction_leaf,feature_preprocessor:random_trees_embedding:n_estimators,feature_preprocessor:select_percentile_classification:percentile,feature_preprocessor:select_percentile_classification:score_func,feature_preprocessor:select_rates_classification:alpha,feature_preprocessor:select_rates_classification:mode,feature_preprocessor:select_rates_classification:score_func,data_preprocessor:__choice__,data_preprocessor:feature_type:text_transformer:text_feature_reduction:n_components,data_preprocessor:feature_type:text_transformer:text_encoding:__choice__,data_preprocessor:feature_type:text_transformer:text_encoding:bag_of_word_encoding:min_df_absolute,data_preprocessor:feature_type:text_transformer:text_encoding:bag_of_word_encoding:min_df_choice,data_preprocessor:feature_type:text_transformer:text_encoding:bag_of_word_encoding:min_df_relative,data_preprocessor:feature_type:text_transformer:text_encoding:bag_of_word_encoding_distinct:min_df_absolute,data_preprocessor:feature_type:text_transformer:text_encoding:bag_of_word_encoding_distinct:min_df_choice,data_preprocessor:feature_type:text_transformer:text_encoding:bag_of_word_encoding_distinct:min_df_relative,data_preprocessor:feature_type:text_transformer:text_encoding:tfidf_encoding:min_df_absolute,data_preprocessor:feature_type:text_transformer:text_encoding:tfidf_encoding:min_df_choice,data_preprocessor:feature_type:text_transformer:text_encoding:tfidf_encoding:min_df_relative,data_preprocessor:feature_type:text_transformer:text_encoding:tfidf_encoding:use_idf,data_preprocessor:feature_type:text_transformer:text_encoding:bag_of_word_encoding:ngram_upper_bound -1,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.006832691101653281,0.0990420448281782,auto,255,None,58,9,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.047607909209835673,most_frequent,normalize,,,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,72,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -2,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.215660250704945e-08,0.0568967527929491,auto,255,None,74,58,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3999502319254789,fwe,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -3,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.7811236762098946,None,0.0,15,9,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,none,,,,,extra_trees_preproc_for_classification,False,entropy,None,0.2269858618750471,None,0.0,13,12,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -4,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5804208006044023,None,0.0,5,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.061500733991527654,fdr,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -5,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -6,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,683.603209772402,-0.7761786661778607,4,1.0146245161392977,poly,-1,True,0.0004729761062000146,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.07556779791699596,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46651479293540027,fdr,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -7,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.993647974710288,-0.09714179076410145,2,0.10000000000000006,poly,-1,True,0.0011475566557439987,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.8124421960026027,0.18251138129426106,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,True,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -8,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.31482574716831474,None,0.0,15,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,False,True,1,squared_hinge,ovr,l1,5.5234897124903465e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -9,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.8932965286370729,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -10,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.7447413257016613,None,0.0,1,17,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.35543776610807626,most_frequent,standardize,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -11,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1160.8950120742215,0.46804236914612085,3,0.334160333047632,poly,-1,False,2.634522104055978e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.00045750613705912717,median,standardize,,,,,fast_ica,,,,,,,,,,,parallel,exp,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -12,weighting,bernoulli_nb,,,,,8.25565461859145,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,minmax,,,,,extra_trees_preproc_for_classification,True,gini,None,0.7706131584628054,None,0.0,17,6,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -13,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.8723691924170074,None,0.0,16,4,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.7753213990353444,0.23761031439263935,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.3362359945406512,False,True,1,squared_hinge,ovr,l1,7.811811240118101e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -14,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00039927077813935847,True,,0.0002226431182528295,True,,constant,log,l1,,2.3026724800524452e-05,one_hot_encoding,minority_coalescer,0.0005751185552832477,mean,minmax,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -15,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.591176484799245e-05,True,True,squared_hinge,0.0017562521044689225,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.06807526502411573,median,robust_scaler,,,0.9384996302050278,0.23621961410324244,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,6.83683853040988,False,True,1,squared_hinge,ovr,l1,0.00012606991329428092,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -16,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.6972483216914105,None,0.0,1,19,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.014621392543150346,mean,robust_scaler,,,0.748827539467177,0.25,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,0.8574750868407579,False,True,1,squared_hinge,ovr,l1,0.00010000000000000009,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -17,none,adaboost,SAMME,0.07484110574206897,4,344,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.12052203362859185,most_frequent,minmax,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6036566748728293,False,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -18,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -19,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.24381781930497387,None,0.0,2,3,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0031003006443551303,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -20,weighting,adaboost,SAMME,0.8984989505050309,8,296,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.11018798131108497,median,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,86.11171751703243,mutual_info,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -21,weighting,extra_trees,,,,,,,,,,,,,,,True,gini,None,0.5634323443830136,None,0.0,6,13,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.010000000000000004,mean,quantile_transformer,1046,normal,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -22,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4158372160789875,None,0.0,2,14,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,robust_scaler,,,0.7861547674423665,0.2590888834784356,extra_trees_preproc_for_classification,True,gini,None,0.3820313482020813,None,0.0,1,16,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -23,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.6996707221595181,None,0.0,1,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.00018281165725763588,mean,standardize,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,ward,22,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -24,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.14699663235614766,None,0.0,1,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010362166964374616,most_frequent,quantile_transformer,1023,uniform,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -25,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7307288580309983,None,0.0,8,3,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.9446945897887754,0.24288911164801377,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -26,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.7327443614487745,None,0.0,17,20,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.007015392562569838,median,quantile_transformer,1314,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -27,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46057831591617715,False,True,hinge,0.04557857428827514,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.00027457445401600137,median,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.48190346970486964,None,0.0,17,18,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -28,weighting,adaboost,SAMME,0.01015775085651925,8,478,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.018006821527918774,median,robust_scaler,,,0.75,0.25,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.983674258036966,True,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -29,weighting,extra_trees,,,,,,,,,,,,,,,False,entropy,None,0.2897695525330714,None,0.0,1,18,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,False,True,1,squared_hinge,ovr,l1,5.8210471080919316e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -30,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.6993161849181185,0.44235005157802176,auto,255,None,270,15,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0077758033214372,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -31,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.34085742012558995,None,0.0,2,2,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.010251558508210521,most_frequent,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,94.77374433257484,chi2,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -32,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23.617163356857034,0.4249652342218557,2,0.03426568422270486,poly,-1,True,0.0003809897288698571,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004949301030421484,most_frequent,quantile_transformer,847,normal,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,3.3404000226016595,False,True,1,squared_hinge,ovr,l1,0.00013812954117187317,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -33,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4751607001217574,None,0.0,2,18,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0015489667569464098,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2465775971025667,fpr,chi2,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -34,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,0.787172957129578,0.23076913534674612,auto,255,None,8,4,10,loss,1E-07,0.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.002842817334543296,mean,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2779207466036798,fwe,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -35,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,158.8949863228855,,,0.03920054687126197,rbf,-1,True,0.05469985785058926,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.015996674733825135,most_frequent,quantile_transformer,1033,uniform,,,fast_ica,,,,,,,,,,,parallel,logcosh,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -36,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.1161756717784211,None,0.0,2,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -37,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,114.37037237306517,,,0.013196149743002957,rbf,-1,False,2.2119982336561568e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.030600209348810598,median,robust_scaler,,,0.8903774541072713,0.14849508114407797,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.10727720089253716,fwe,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -38,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,25369.899739311986,-0.20538081740449166,,0.007550793530761754,sigmoid,-1,True,0.00014198788135109906,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.009250691729522439,most_frequent,quantile_transformer,1442,uniform,,,fast_ica,,,,,,,,,,,parallel,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -39,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.742074481485891,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010388289410086769,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -40,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0038325481818368653,most_frequent,quantile_transformer,1000,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -41,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46057831591617715,False,True,hinge,0.04557857428827514,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.00027457445401600137,median,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.48190346970486964,None,0.0,17,18,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -42,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.00012339000686260981,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010000000000000004,mean,standardize,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.027161884929113287,3011,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -43,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1007.8868860667042,0.7073639177519475,2,0.0009693320195457126,poly,-1,True,0.00048384544670559135,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.017078985265493323,median,quantile_transformer,971,uniform,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27854669854596986,fpr,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -44,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.0009039383509168851,0.013859624893482336,auto,255,None,314,166,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.07166826832005445,median,robust_scaler,,,0.8113117119932765,0.22229745700501014,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -45,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.5916641238089724,None,0.0,1,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.014941875096420176,most_frequent,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.6621674571394228,None,0.0,3,11,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -46,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.4177635558897493,True,True,hinge,0.00036622547004230247,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.3298639925115399,median,normalize,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.02443001336430177,7802,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -47,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.576840761438448e-07,True,,0.0003087686113414944,True,1.0895900532824292e-07,constant,hinge,elasticnet,,0.007781223173502778,no_encoding,minority_coalescer,0.002482961497851837,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -48,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.6149200141024044,None,0.0,3,6,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,normalize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27403124544524843,fdr,chi2,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -49,weighting,adaboost,SAMME,1.1345415570152533,6,472,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,89.55941314463736,f_classif,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -50,none,adaboost,SAMME,1.117891964153124,7,350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.099090775365223,mean,standardize,,,,,fast_ica,,,,,,,,,,,parallel,exp,1122,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -51,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,1.260108334347015e-07,0.020450900578038868,auto,255,None,1907,82,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,quantile_transformer,1015,normal,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.9704315246405552,True,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -52,none,adaboost,SAMME,0.011233995624432622,9,477,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.018370622484682127,mean,standardize,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6039710338898471,False,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -53,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.09778930778910033,None,0.0,7,16,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0006832094190599457,median,robust_scaler,,,0.942955932669814,0.23893530390938889,feature_agglomeration,,,,,,,,,,,,,,,cosine,complete,178,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -54,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1173.0906096449921,0.18922666901484564,3,0.0022927600388323774,poly,-1,True,1.86623690497442e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,standardize,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,complete,306,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -55,none,bernoulli_nb,,,,,0.07695200771694985,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.0003825010764625397,median,minmax,,,,,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,cosine,9121,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -56,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.35183637194483053,None,0.0,1,16,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -57,weighting,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.5337657666025502,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.2675503466579934,median,normalize,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -58,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.43183995003940995,None,0.0,1,10,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.028164291368553036,mean,standardize,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -59,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.38845176895497546,True,True,hinge,0.07195442121939964,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.009803171174126721,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,True,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -60,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.19548169161642792,None,0.0,10,18,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0031030790458014663,most_frequent,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,96.55453782974163,f_classif,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -61,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4077.0183557137793,,,0.18582946048483806,rbf,-1,True,0.007982841167341137,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0024292204383546253,mean,robust_scaler,,,0.7925685994397953,0.28082571006541873,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,15,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -62,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.02145872972690199,0.03842927840160621,auto,255,None,171,8,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.004228524718610471,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.13472853186492292,fdr,chi2,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -63,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.35459002631952014,None,0.0,5,15,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010270173676218672,median,quantile_transformer,1807,uniform,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,56.525707700661215,chi2,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -64,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10091.529330032845,,,0.0011283303013784186,rbf,-1,True,0.006930076959856067,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -65,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.7553798077390236e-05,False,,,True,0.5295119133805599,optimal,log,elasticnet,,0.0002846848503288152,no_encoding,minority_coalescer,0.05377825070455988,mean,quantile_transformer,1591,normal,,,fast_ica,,,,,,,,,,,deflation,cube,1400,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -66,weighting,adaboost,SAMME,0.433235479452316,10,496,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.22971812945532427,mean,quantile_transformer,1000,normal,,,extra_trees_preproc_for_classification,False,gini,None,0.7947988012814284,None,0.0,18,2,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -67,weighting,extra_trees,,,,,,,,,,,,,,,False,entropy,None,0.6706605040553315,None,0.0,9,6,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,none,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -68,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5838261599040107,None,0.0,1,12,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.02303141194502663,most_frequent,normalize,,,,,extra_trees_preproc_for_classification,False,gini,None,0.037852255328846424,None,0.0,1,10,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -69,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.10979683793960729,False,True,1,squared_hinge,ovr,l2,0.026948804550238153,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0023169684511818865,median,standardize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -70,none,gaussian_nb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010079280155090884,median,quantile_transformer,1000,uniform,,,fast_ica,,,,,,,,,,,deflation,exp,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -71,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,8.871723288059728e-10,0.08842201394778045,auto,255,None,10,29,17,loss,1E-07,0.0892491697716671,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.011271569596841927,most_frequent,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -72,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,None,,0.09865556043445883,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.005823759089880865,most_frequent,robust_scaler,,,0.7978120173580654,0.2811219694282669,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.17624345903922523,2,0.0009546886303843381,poly,8610,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -73,none,adaboost,SAMME,0.011233995624432622,9,477,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.018370622484682127,mean,standardize,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6039710338898471,False,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -74,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.16962127437318736,None,0.0,6,19,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.7344178072071488,0.2612353681753729,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -75,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16.135683787075433,0.2634264231167345,3,0.18827113652741964,poly,-1,False,0.0002553726933201338,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0006724292611836027,mean,standardize,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.969882360922218,False,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -76,none,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.0013936557152472336,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.11975636073546837,median,quantile_transformer,1211,normal,,,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6358887997528,5,0.003986693030872811,poly,1128,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -77,weighting,extra_trees,,,,,,,,,,,,,,,False,gini,None,0.6662795762050231,None,0.0,2,13,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.014575553627820731,median,none,,,,,extra_trees_preproc_for_classification,False,gini,None,0.11161311461671355,None,0.0,1,12,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -78,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.04375141824118664,None,0.0,1,12,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.023807358529040604,median,robust_scaler,,,0.75,0.25,feature_agglomeration,,,,,,,,,,,,,,,euclidean,average,25,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -79,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22226.18914462097,False,True,1,squared_hinge,ovr,l2,0.0011791055409702416,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,mean,minmax,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,59.98695754599322,mutual_info,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -80,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,0.00833872532969829,0.012200837951703469,auto,255,None,10,6,7,loss,1E-07,0.06919768711524409,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.01993990998225036,median,robust_scaler,,,0.7362122580857475,0.2308155422699841,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,23.392736320751,False,True,1,squared_hinge,ovr,l1,2.6042719275400663e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -81,weighting,extra_trees,,,,,,,,,,,,,,,True,entropy,None,0.8939382130524546,None,0.0,19,16,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.03124006779440345,most_frequent,quantile_transformer,1043,uniform,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50.47795832236559,f_classif,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -82,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.7477273064743348,None,0.0,1,11,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.044336192142984635,most_frequent,quantile_transformer,946,uniform,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,214,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -83,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.00010691026378585387,0.03064187005324359,auto,255,None,12,1,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,quantile_transformer,1859,normal,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,151,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -84,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.599213541504855e-05,False,True,squared_hinge,0.06784385222631517,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.5061717386466865,None,0.0,15,2,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -85,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -86,weighting,gaussian_nb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.8954533564156945,0.22543185910651606,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -87,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00014964522844486457,True,True,hinge,0.0006431519230826446,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0001638512484423687,most_frequent,quantile_transformer,108,normal,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,90.1579424941942,chi2,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -88,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,25.969569717137436,0.49624326950325637,2,0.029383790525059468,poly,-1,False,0.002714764732764232,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.4371533082952412,mean,robust_scaler,,,0.8231337422796986,0.26700254382353833,fast_ica,,,,,,,,,,,parallel,logcosh,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -89,weighting,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.07578664472529394,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.012596384519267407,median,robust_scaler,,,0.7176883035814098,0.2870577047962274,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,0.6428686651502072,False,True,1,squared_hinge,ovr,l1,2.5546943595340656e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -90,weighting,adaboost,SAMME,0.07959216314142419,1,124,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,robust_scaler,,,0.7401836136931198,0.2679472228039613,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,18.787733861356816,mutual_info,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -91,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.0819425045156221,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.37737820096945385,median,minmax,,,,,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.5073162154954842,2,0.004207352122999392,poly,1212,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -92,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7229728445103076,None,0.0,5,13,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.7823020129596692,0.1205596141179452,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.12983623180397538,fwe,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -93,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7982423863663426,None,0.0,11,7,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.003645478141655197,median,robust_scaler,,,0.7651599230489026,0.15388614105871848,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,34,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -94,none,adaboost,SAMME,0.04534487012126666,9,237,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.009377862051712454,median,minmax,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,4.281022361344507,False,True,1,squared_hinge,ovr,l1,0.02040524760798526,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -95,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -96,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.5259488669933622,None,0.0,1,11,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,standardize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -97,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -98,weighting,adaboost,SAMME,0.24826166093503962,4,203,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.011447514256202326,median,quantile_transformer,949,normal,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.7702718499065888,True,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -99,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,5.295700573535198e-10,0.042756254512807394,auto,255,None,84,14,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.35207536432313746,median,robust_scaler,,,0.7792676238311911,0.28901203457977576,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,248,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -100,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.0016445078304079647,0.1997863062244349,auto,255,None,564,15,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.75,0.23248947228355937,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.32975511665416357,fdr,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -101,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0019618741335452826,True,True,hinge,0.0004803120822404903,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,standardize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,57.38056668131513,mutual_info,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -102,weighting,adaboost,SAMME.R,0.0190998863782481,7,99,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,quantile_transformer,1177,uniform,,,random_trees_embedding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,7,None,1,3,1.0,97,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -103,weighting,extra_trees,,,,,,,,,,,,,,,False,gini,None,0.8850157429082246,None,0.0,12,9,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.27673478870889345,median,none,,,,,fast_ica,,,,,,,,,,,deflation,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -104,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -105,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -106,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.527652780264615,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010739139664277112,mean,standardize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,True,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -107,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.6568118863259279,None,0.0,4,2,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,normalize,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,average,23,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -108,none,k_nearest_neighbors,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2,distance,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,minmax,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,86.72140178707858,f_classif,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -109,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -110,weighting,adaboost,SAMME.R,0.22665749778830807,7,78,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,quantile_transformer,1000,uniform,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,complete,373,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -111,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1689.0860195745497,0.7404917548199534,2,0.011884114654356123,poly,-1,True,4.262566522678876e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004395163582476699,median,minmax,,,,,random_trees_embedding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,3,None,13,13,1.0,74,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -112,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.859589328406253e-07,0.25392293346701533,auto,255,None,4,80,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,standardize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,65.7135135608214,mutual_info,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -113,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,9.097196057095871e-06,0.32800101253288033,auto,255,None,53,28,20,loss,1E-07,0.101919468281566,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008121631984215255,median,robust_scaler,,,0.7654960296398138,0.25,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.07288328235161678,fwe,chi2,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -114,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.5463209559127865,None,0.0,12,5,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.025636105021492692,mean,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.4472785394247571,False,True,1,squared_hinge,ovr,l1,0.00018809455411335498,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -115,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.07970183198340376,None,0.0,9,9,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.008729901092151533,most_frequent,quantile_transformer,1028,normal,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -116,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.050858257794119e-10,0.0509713008465305,auto,255,None,17,16,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.0006772048110168395,median,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.42719682445182733,fdr,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -117,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4886932005592788,None,0.0,1,17,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,minmax,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,2.2673178962517726,False,True,1,squared_hinge,ovr,l1,0.07576775715726437,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -118,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1954.4001293172055,0.5941977727413141,3,0.01222672837922025,poly,-1,False,0.000868704184075337,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.023611476558497053,most_frequent,quantile_transformer,1312,normal,,,extra_trees_preproc_for_classification,True,entropy,None,0.26038719206370126,None,0.0,6,14,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -119,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.48772464140872207,None,0.0,1,16,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.010000000000000004,most_frequent,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -120,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,571.8976418358935,-0.6542106402522795,,5.0850539598583375e-05,sigmoid,-1,False,0.003954814208041632,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.1378799965815952,fwe,f_classif,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -121,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.6204291847226782,None,0.0,2,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.012802264108301202,most_frequent,normalize,,,,,fast_ica,,,,,,,,,,,deflation,exp,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -122,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.7159488281157247,None,0.0,15,3,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.7546151696972261,0.25941712940346606,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,33,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -123,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.4285190453868457,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.49851517731857553,most_frequent,quantile_transformer,958,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -124,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.690301155272815,None,0.0,19,7,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.00018373331741554494,most_frequent,normalize,,,,,extra_trees_preproc_for_classification,False,gini,None,0.5638614505566816,None,0.0,1,7,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -125,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -126,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.77554422882638,True,True,hinge,0.02071347009354619,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.01999967026715386,most_frequent,minmax,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.011708181260754043,8770,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -127,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,2.5396632902741157e-05,0.18584665494894462,auto,255,None,6,48,13,loss,1E-07,0.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,none,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,complete,379,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 -128,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,921.894056649221,,,0.4428468266254544,rbf,-1,False,1.0295003364004332e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,robust_scaler,,,0.7588611364765459,0.25,fast_ica,,,,,,,,,,,deflation,logcosh,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type,100,bag_of_word_encoding,1,min_df_absolute,,,,,,,,,1 +idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,classifier:adaboost:learning_rate,classifier:adaboost:max_depth,classifier:adaboost:n_estimators,classifier:bernoulli_nb:alpha,classifier:bernoulli_nb:fit_prior,classifier:decision_tree:criterion,classifier:decision_tree:max_depth_factor,classifier:decision_tree:max_features,classifier:decision_tree:max_leaf_nodes,classifier:decision_tree:min_impurity_decrease,classifier:decision_tree:min_samples_leaf,classifier:decision_tree:min_samples_split,classifier:decision_tree:min_weight_fraction_leaf,classifier:extra_trees:bootstrap,classifier:extra_trees:criterion,classifier:extra_trees:max_depth,classifier:extra_trees:max_features,classifier:extra_trees:max_leaf_nodes,classifier:extra_trees:min_impurity_decrease,classifier:extra_trees:min_samples_leaf,classifier:extra_trees:min_samples_split,classifier:extra_trees:min_weight_fraction_leaf,classifier:gradient_boosting:early_stop,classifier:gradient_boosting:l2_regularization,classifier:gradient_boosting:learning_rate,classifier:gradient_boosting:loss,classifier:gradient_boosting:max_bins,classifier:gradient_boosting:max_depth,classifier:gradient_boosting:max_leaf_nodes,classifier:gradient_boosting:min_samples_leaf,classifier:gradient_boosting:n_iter_no_change,classifier:gradient_boosting:scoring,classifier:gradient_boosting:tol,classifier:gradient_boosting:validation_fraction,classifier:k_nearest_neighbors:n_neighbors,classifier:k_nearest_neighbors:p,classifier:k_nearest_neighbors:weights,classifier:lda:shrinkage,classifier:lda:shrinkage_factor,classifier:lda:tol,classifier:liblinear_svc:C,classifier:liblinear_svc:dual,classifier:liblinear_svc:fit_intercept,classifier:liblinear_svc:intercept_scaling,classifier:liblinear_svc:loss,classifier:liblinear_svc:multi_class,classifier:liblinear_svc:penalty,classifier:liblinear_svc:tol,classifier:libsvm_svc:C,classifier:libsvm_svc:coef0,classifier:libsvm_svc:degree,classifier:libsvm_svc:gamma,classifier:libsvm_svc:kernel,classifier:libsvm_svc:max_iter,classifier:libsvm_svc:shrinking,classifier:libsvm_svc:tol,classifier:multinomial_nb:alpha,classifier:multinomial_nb:fit_prior,classifier:passive_aggressive:C,classifier:passive_aggressive:average,classifier:passive_aggressive:fit_intercept,classifier:passive_aggressive:loss,classifier:passive_aggressive:tol,classifier:qda:reg_param,classifier:random_forest:bootstrap,classifier:random_forest:criterion,classifier:random_forest:max_depth,classifier:random_forest:max_features,classifier:random_forest:max_leaf_nodes,classifier:random_forest:min_impurity_decrease,classifier:random_forest:min_samples_leaf,classifier:random_forest:min_samples_split,classifier:random_forest:min_weight_fraction_leaf,classifier:sgd:alpha,classifier:sgd:average,classifier:sgd:epsilon,classifier:sgd:eta0,classifier:sgd:fit_intercept,classifier:sgd:l1_ratio,classifier:sgd:learning_rate,classifier:sgd:loss,classifier:sgd:penalty,classifier:sgd:power_t,classifier:sgd:tol,data_preprocessor:feature_type:categorical_transformer:categorical_encoding:__choice__,data_preprocessor:feature_type:categorical_transformer:category_coalescence:__choice__,data_preprocessor:feature_type:categorical_transformer:category_coalescence:minority_coalescer:minimum_fraction,data_preprocessor:feature_type:numerical_transformer:imputation:strategy,data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__,data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles,data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution,data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_max,data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_min,feature_preprocessor:__choice__,feature_preprocessor:extra_trees_preproc_for_classification:bootstrap,feature_preprocessor:extra_trees_preproc_for_classification:criterion,feature_preprocessor:extra_trees_preproc_for_classification:max_depth,feature_preprocessor:extra_trees_preproc_for_classification:max_features,feature_preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes,feature_preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_leaf,feature_preprocessor:extra_trees_preproc_for_classification:min_samples_split,feature_preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf,feature_preprocessor:extra_trees_preproc_for_classification:n_estimators,feature_preprocessor:fast_ica:algorithm,feature_preprocessor:fast_ica:fun,feature_preprocessor:fast_ica:n_components,feature_preprocessor:fast_ica:whiten,feature_preprocessor:feature_agglomeration:affinity,feature_preprocessor:feature_agglomeration:linkage,feature_preprocessor:feature_agglomeration:n_clusters,feature_preprocessor:feature_agglomeration:pooling_func,feature_preprocessor:kernel_pca:coef0,feature_preprocessor:kernel_pca:degree,feature_preprocessor:kernel_pca:gamma,feature_preprocessor:kernel_pca:kernel,feature_preprocessor:kernel_pca:n_components,feature_preprocessor:kitchen_sinks:gamma,feature_preprocessor:kitchen_sinks:n_components,feature_preprocessor:liblinear_svc_preprocessor:C,feature_preprocessor:liblinear_svc_preprocessor:dual,feature_preprocessor:liblinear_svc_preprocessor:fit_intercept,feature_preprocessor:liblinear_svc_preprocessor:intercept_scaling,feature_preprocessor:liblinear_svc_preprocessor:loss,feature_preprocessor:liblinear_svc_preprocessor:multi_class,feature_preprocessor:liblinear_svc_preprocessor:penalty,feature_preprocessor:liblinear_svc_preprocessor:tol,feature_preprocessor:nystroem_sampler:coef0,feature_preprocessor:nystroem_sampler:degree,feature_preprocessor:nystroem_sampler:gamma,feature_preprocessor:nystroem_sampler:kernel,feature_preprocessor:nystroem_sampler:n_components,feature_preprocessor:pca:keep_variance,feature_preprocessor:pca:whiten,feature_preprocessor:polynomial:degree,feature_preprocessor:polynomial:include_bias,feature_preprocessor:polynomial:interaction_only,feature_preprocessor:random_trees_embedding:bootstrap,feature_preprocessor:random_trees_embedding:max_depth,feature_preprocessor:random_trees_embedding:max_leaf_nodes,feature_preprocessor:random_trees_embedding:min_samples_leaf,feature_preprocessor:random_trees_embedding:min_samples_split,feature_preprocessor:random_trees_embedding:min_weight_fraction_leaf,feature_preprocessor:random_trees_embedding:n_estimators,feature_preprocessor:select_percentile_classification:percentile,feature_preprocessor:select_percentile_classification:score_func,feature_preprocessor:select_rates_classification:alpha,feature_preprocessor:select_rates_classification:mode,feature_preprocessor:select_rates_classification:score_func,data_preprocessor:__choice__ +1,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.006832691101653281,0.0990420448281782,auto,255,None,58,9,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.047607909209835673,most_frequent,normalize,,,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,72,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +2,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.215660250704945e-08,0.0568967527929491,auto,255,None,74,58,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.3999502319254789,fwe,f_classif,feature_type +3,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.7811236762098946,None,0.0,15,9,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,none,,,,,extra_trees_preproc_for_classification,False,entropy,None,0.2269858618750471,None,0.0,13,12,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +4,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5804208006044023,None,0.0,5,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.061500733991527654,fdr,f_classif,feature_type +5,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +6,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,683.603209772402,-0.7761786661778607,4,1.0146245161392977,poly,-1,True,0.0004729761062000146,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.07556779791699596,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46651479293540027,fdr,f_classif,feature_type +7,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.993647974710288,-0.09714179076410145,2,0.10000000000000006,poly,-1,True,0.0011475566557439987,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.8124421960026027,0.18251138129426106,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,True,,,,,,,,,,,,,feature_type +8,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.31482574716831474,None,0.0,15,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,False,True,1,squared_hinge,ovr,l1,5.5234897124903465e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type +9,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.8932965286370729,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type +10,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.7447413257016613,None,0.0,1,17,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.35543776610807626,most_frequent,standardize,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +11,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1160.8950120742215,0.46804236914612085,3,0.334160333047632,poly,-1,False,2.634522104055978e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.00045750613705912717,median,standardize,,,,,fast_ica,,,,,,,,,,,parallel,exp,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +12,weighting,bernoulli_nb,,,,,8.25565461859145,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,minmax,,,,,extra_trees_preproc_for_classification,True,gini,None,0.7706131584628054,None,0.0,17,6,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +13,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.8723691924170074,None,0.0,16,4,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.7753213990353444,0.23761031439263935,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.3362359945406512,False,True,1,squared_hinge,ovr,l1,7.811811240118101e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type +14,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00039927077813935847,True,,0.0002226431182528295,True,,constant,log,l1,,2.3026724800524452e-05,one_hot_encoding,minority_coalescer,0.0005751185552832477,mean,minmax,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +15,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.591176484799245e-05,True,True,squared_hinge,0.0017562521044689225,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.06807526502411573,median,robust_scaler,,,0.9384996302050278,0.23621961410324244,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,6.83683853040988,False,True,1,squared_hinge,ovr,l1,0.00012606991329428092,,,,,,,,,,,,,,,,,,,,,,,feature_type +16,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.6972483216914105,None,0.0,1,19,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.014621392543150346,mean,robust_scaler,,,0.748827539467177,0.25,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,0.8574750868407579,False,True,1,squared_hinge,ovr,l1,0.00010000000000000009,,,,,,,,,,,,,,,,,,,,,,,feature_type +17,none,adaboost,SAMME,0.07484110574206897,4,344,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.12052203362859185,most_frequent,minmax,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6036566748728293,False,,,,,,,,,,,,,,,,feature_type +18,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +19,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.24381781930497387,None,0.0,2,3,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0031003006443551303,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +20,weighting,adaboost,SAMME,0.8984989505050309,8,296,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.11018798131108497,median,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,86.11171751703243,mutual_info,,,,feature_type +21,weighting,extra_trees,,,,,,,,,,,,,,,True,gini,None,0.5634323443830136,None,0.0,6,13,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.010000000000000004,mean,quantile_transformer,1046,normal,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type +22,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4158372160789875,None,0.0,2,14,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,robust_scaler,,,0.7861547674423665,0.2590888834784356,extra_trees_preproc_for_classification,True,gini,None,0.3820313482020813,None,0.0,1,16,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +23,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.6996707221595181,None,0.0,1,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.00018281165725763588,mean,standardize,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,ward,22,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +24,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.14699663235614766,None,0.0,1,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010362166964374616,most_frequent,quantile_transformer,1023,uniform,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +25,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7307288580309983,None,0.0,8,3,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.9446945897887754,0.24288911164801377,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +26,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.7327443614487745,None,0.0,17,20,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.007015392562569838,median,quantile_transformer,1314,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,False,,,,,,,,,,,,,feature_type +27,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46057831591617715,False,True,hinge,0.04557857428827514,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.00027457445401600137,median,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.48190346970486964,None,0.0,17,18,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +28,weighting,adaboost,SAMME,0.01015775085651925,8,478,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.018006821527918774,median,robust_scaler,,,0.75,0.25,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.983674258036966,True,,,,,,,,,,,,,,,,feature_type +29,weighting,extra_trees,,,,,,,,,,,,,,,False,entropy,None,0.2897695525330714,None,0.0,1,18,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,False,True,1,squared_hinge,ovr,l1,5.8210471080919316e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type +30,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.6993161849181185,0.44235005157802176,auto,255,None,270,15,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0077758033214372,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +31,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.34085742012558995,None,0.0,2,2,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.010251558508210521,most_frequent,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,94.77374433257484,chi2,,,,feature_type +32,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23.617163356857034,0.4249652342218557,2,0.03426568422270486,poly,-1,True,0.0003809897288698571,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004949301030421484,most_frequent,quantile_transformer,847,normal,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,3.3404000226016595,False,True,1,squared_hinge,ovr,l1,0.00013812954117187317,,,,,,,,,,,,,,,,,,,,,,,feature_type +33,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4751607001217574,None,0.0,2,18,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0015489667569464098,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2465775971025667,fpr,chi2,feature_type +34,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,0.787172957129578,0.23076913534674612,auto,255,None,8,4,10,loss,1E-07,0.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.002842817334543296,mean,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2779207466036798,fwe,f_classif,feature_type +35,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,158.8949863228855,,,0.03920054687126197,rbf,-1,True,0.05469985785058926,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.015996674733825135,most_frequent,quantile_transformer,1033,uniform,,,fast_ica,,,,,,,,,,,parallel,logcosh,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +36,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.1161756717784211,None,0.0,2,2,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type +37,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,114.37037237306517,,,0.013196149743002957,rbf,-1,False,2.2119982336561568e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.030600209348810598,median,robust_scaler,,,0.8903774541072713,0.14849508114407797,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.10727720089253716,fwe,f_classif,feature_type +38,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,25369.899739311986,-0.20538081740449166,,0.007550793530761754,sigmoid,-1,True,0.00014198788135109906,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.009250691729522439,most_frequent,quantile_transformer,1442,uniform,,,fast_ica,,,,,,,,,,,parallel,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +39,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.742074481485891,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010388289410086769,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +40,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0038325481818368653,most_frequent,quantile_transformer,1000,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type +41,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.46057831591617715,False,True,hinge,0.04557857428827514,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.00027457445401600137,median,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.48190346970486964,None,0.0,17,18,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +42,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.00012339000686260981,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010000000000000004,mean,standardize,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.027161884929113287,3011,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +43,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1007.8868860667042,0.7073639177519475,2,0.0009693320195457126,poly,-1,True,0.00048384544670559135,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.017078985265493323,median,quantile_transformer,971,uniform,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27854669854596986,fpr,f_classif,feature_type +44,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.0009039383509168851,0.013859624893482336,auto,255,None,314,166,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.07166826832005445,median,robust_scaler,,,0.8113117119932765,0.22229745700501014,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +45,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.5916641238089724,None,0.0,1,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.014941875096420176,most_frequent,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.6621674571394228,None,0.0,3,11,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +46,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.4177635558897493,True,True,hinge,0.00036622547004230247,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.3298639925115399,median,normalize,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.02443001336430177,7802,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +47,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.576840761438448e-07,True,,0.0003087686113414944,True,1.0895900532824292e-07,constant,hinge,elasticnet,,0.007781223173502778,no_encoding,minority_coalescer,0.002482961497851837,mean,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +48,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.6149200141024044,None,0.0,3,6,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,normalize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.27403124544524843,fdr,chi2,feature_type +49,weighting,adaboost,SAMME,1.1345415570152533,6,472,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,89.55941314463736,f_classif,,,,feature_type +50,none,adaboost,SAMME,1.117891964153124,7,350,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.099090775365223,mean,standardize,,,,,fast_ica,,,,,,,,,,,parallel,exp,1122,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +51,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,1.260108334347015e-07,0.020450900578038868,auto,255,None,1907,82,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,quantile_transformer,1015,normal,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.9704315246405552,True,,,,,,,,,,,,,,,,feature_type +52,none,adaboost,SAMME,0.011233995624432622,9,477,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.018370622484682127,mean,standardize,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6039710338898471,False,,,,,,,,,,,,,,,,feature_type +53,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.09778930778910033,None,0.0,7,16,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0006832094190599457,median,robust_scaler,,,0.942955932669814,0.23893530390938889,feature_agglomeration,,,,,,,,,,,,,,,cosine,complete,178,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +54,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1173.0906096449921,0.18922666901484564,3,0.0022927600388323774,poly,-1,True,1.86623690497442e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,standardize,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,complete,306,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +55,none,bernoulli_nb,,,,,0.07695200771694985,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.0003825010764625397,median,minmax,,,,,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,cosine,9121,,,,,,,,,,,,,,,,,,feature_type +56,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.35183637194483053,None,0.0,1,16,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,False,,,,,,,,,,,,,feature_type +57,weighting,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.5337657666025502,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.2675503466579934,median,normalize,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +58,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.43183995003940995,None,0.0,1,10,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.028164291368553036,mean,standardize,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +59,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.38845176895497546,True,True,hinge,0.07195442121939964,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.009803171174126721,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,True,False,,,,,,,,,,,,,feature_type +60,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.19548169161642792,None,0.0,10,18,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.0031030790458014663,most_frequent,normalize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,96.55453782974163,f_classif,,,,feature_type +61,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4077.0183557137793,,,0.18582946048483806,rbf,-1,True,0.007982841167341137,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0024292204383546253,mean,robust_scaler,,,0.7925685994397953,0.28082571006541873,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,15,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +62,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.02145872972690199,0.03842927840160621,auto,255,None,171,8,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.004228524718610471,most_frequent,standardize,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.13472853186492292,fdr,chi2,feature_type +63,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.35459002631952014,None,0.0,5,15,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010270173676218672,median,quantile_transformer,1807,uniform,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,56.525707700661215,chi2,,,,feature_type +64,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10091.529330032845,,,0.0011283303013784186,rbf,-1,True,0.006930076959856067,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type +65,weighting,sgd,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.7553798077390236e-05,False,,,True,0.5295119133805599,optimal,log,elasticnet,,0.0002846848503288152,no_encoding,minority_coalescer,0.05377825070455988,mean,quantile_transformer,1591,normal,,,fast_ica,,,,,,,,,,,deflation,cube,1400,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +66,weighting,adaboost,SAMME,0.433235479452316,10,496,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.22971812945532427,mean,quantile_transformer,1000,normal,,,extra_trees_preproc_for_classification,False,gini,None,0.7947988012814284,None,0.0,18,2,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +67,weighting,extra_trees,,,,,,,,,,,,,,,False,entropy,None,0.6706605040553315,None,0.0,9,6,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,none,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,False,,,,,,,,,,,,,feature_type +68,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.5838261599040107,None,0.0,1,12,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.02303141194502663,most_frequent,normalize,,,,,extra_trees_preproc_for_classification,False,gini,None,0.037852255328846424,None,0.0,1,10,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +69,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.10979683793960729,False,True,1,squared_hinge,ovr,l2,0.026948804550238153,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0023169684511818865,median,standardize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,False,,,,,,,,,,,,,feature_type +70,none,gaussian_nb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010079280155090884,median,quantile_transformer,1000,uniform,,,fast_ica,,,,,,,,,,,deflation,exp,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +71,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,8.871723288059728e-10,0.08842201394778045,auto,255,None,10,29,17,loss,1E-07,0.0892491697716671,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.011271569596841927,most_frequent,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type +72,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,None,,0.09865556043445883,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.005823759089880865,most_frequent,robust_scaler,,,0.7978120173580654,0.2811219694282669,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.17624345903922523,2,0.0009546886303843381,poly,8610,,,,,,,,,,,,,,,,,,feature_type +73,none,adaboost,SAMME,0.011233995624432622,9,477,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.018370622484682127,mean,standardize,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6039710338898471,False,,,,,,,,,,,,,,,,feature_type +74,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.16962127437318736,None,0.0,6,19,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.7344178072071488,0.2612353681753729,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type +75,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16.135683787075433,0.2634264231167345,3,0.18827113652741964,poly,-1,False,0.0002553726933201338,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0006724292611836027,mean,standardize,,,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.969882360922218,False,,,,,,,,,,,,,,,,feature_type +76,none,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.0013936557152472336,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.11975636073546837,median,quantile_transformer,1211,normal,,,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6358887997528,5,0.003986693030872811,poly,1128,,,,,,,,,,,,,,,,,,feature_type +77,weighting,extra_trees,,,,,,,,,,,,,,,False,gini,None,0.6662795762050231,None,0.0,2,13,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.014575553627820731,median,none,,,,,extra_trees_preproc_for_classification,False,gini,None,0.11161311461671355,None,0.0,1,12,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +78,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.04375141824118664,None,0.0,1,12,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.023807358529040604,median,robust_scaler,,,0.75,0.25,feature_agglomeration,,,,,,,,,,,,,,,euclidean,average,25,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +79,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22226.18914462097,False,True,1,squared_hinge,ovr,l2,0.0011791055409702416,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,mean,minmax,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,59.98695754599322,mutual_info,,,,feature_type +80,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,0.00833872532969829,0.012200837951703469,auto,255,None,10,6,7,loss,1E-07,0.06919768711524409,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.01993990998225036,median,robust_scaler,,,0.7362122580857475,0.2308155422699841,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,23.392736320751,False,True,1,squared_hinge,ovr,l1,2.6042719275400663e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type +81,weighting,extra_trees,,,,,,,,,,,,,,,True,entropy,None,0.8939382130524546,None,0.0,19,16,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.03124006779440345,most_frequent,quantile_transformer,1043,uniform,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50.47795832236559,f_classif,,,,feature_type +82,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.7477273064743348,None,0.0,1,11,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.044336192142984635,most_frequent,quantile_transformer,946,uniform,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,214,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +83,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.00010691026378585387,0.03064187005324359,auto,255,None,12,1,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,quantile_transformer,1859,normal,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,151,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +84,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.599213541504855e-05,False,True,squared_hinge,0.06784385222631517,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,standardize,,,,,extra_trees_preproc_for_classification,True,gini,None,0.5061717386466865,None,0.0,15,2,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +85,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +86,weighting,gaussian_nb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.8954533564156945,0.22543185910651606,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +87,none,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.00014964522844486457,True,True,hinge,0.0006431519230826446,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.0001638512484423687,most_frequent,quantile_transformer,108,normal,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,90.1579424941942,chi2,,,,feature_type +88,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,25.969569717137436,0.49624326950325637,2,0.029383790525059468,poly,-1,False,0.002714764732764232,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.4371533082952412,mean,robust_scaler,,,0.8231337422796986,0.26700254382353833,fast_ica,,,,,,,,,,,parallel,logcosh,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +89,weighting,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.07578664472529394,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.012596384519267407,median,robust_scaler,,,0.7176883035814098,0.2870577047962274,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,0.6428686651502072,False,True,1,squared_hinge,ovr,l1,2.5546943595340656e-05,,,,,,,,,,,,,,,,,,,,,,,feature_type +90,weighting,adaboost,SAMME,0.07959216314142419,1,124,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,robust_scaler,,,0.7401836136931198,0.2679472228039613,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,18.787733861356816,mutual_info,,,,feature_type +91,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.0819425045156221,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.37737820096945385,median,minmax,,,,,nystroem_sampler,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.5073162154954842,2,0.004207352122999392,poly,1212,,,,,,,,,,,,,,,,,,feature_type +92,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7229728445103076,None,0.0,5,13,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.7823020129596692,0.1205596141179452,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.12983623180397538,fwe,f_classif,feature_type +93,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.7982423863663426,None,0.0,11,7,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.003645478141655197,median,robust_scaler,,,0.7651599230489026,0.15388614105871848,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,34,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +94,none,adaboost,SAMME,0.04534487012126666,9,237,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.009377862051712454,median,minmax,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,4.281022361344507,False,True,1,squared_hinge,ovr,l1,0.02040524760798526,,,,,,,,,,,,,,,,,,,,,,,feature_type +95,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +96,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.5259488669933622,None,0.0,1,11,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,median,standardize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type +97,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +98,weighting,adaboost,SAMME,0.24826166093503962,4,203,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.011447514256202326,median,quantile_transformer,949,normal,,,pca,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.7702718499065888,True,,,,,,,,,,,,,,,,feature_type +99,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,5.295700573535198e-10,0.042756254512807394,auto,255,None,84,14,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.35207536432313746,median,robust_scaler,,,0.7792676238311911,0.28901203457977576,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,248,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +100,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,0.0016445078304079647,0.1997863062244349,auto,255,None,564,15,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,robust_scaler,,,0.75,0.23248947228355937,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.32975511665416357,fdr,f_classif,feature_type +101,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0019618741335452826,True,True,hinge,0.0004803120822404903,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,standardize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,57.38056668131513,mutual_info,,,,feature_type +102,weighting,adaboost,SAMME.R,0.0190998863782481,7,99,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,most_frequent,quantile_transformer,1177,uniform,,,random_trees_embedding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,7,None,1,3,1.0,97,,,,,,feature_type +103,weighting,extra_trees,,,,,,,,,,,,,,,False,gini,None,0.8850157429082246,None,0.0,12,9,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.27673478870889345,median,none,,,,,fast_ica,,,,,,,,,,,deflation,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +104,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +105,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +106,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.527652780264615,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010739139664277112,mean,standardize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,False,True,,,,,,,,,,,,,feature_type +107,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.6568118863259279,None,0.0,4,2,0.0,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,normalize,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,average,23,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +108,none,k_nearest_neighbors,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2,distance,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,minmax,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,86.72140178707858,f_classif,,,,feature_type +109,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +110,weighting,adaboost,SAMME.R,0.22665749778830807,7,78,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,quantile_transformer,1000,uniform,,,feature_agglomeration,,,,,,,,,,,,,,,cosine,complete,373,median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +111,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1689.0860195745497,0.7404917548199534,2,0.011884114654356123,poly,-1,True,4.262566522678876e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004395163582476699,median,minmax,,,,,random_trees_embedding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,3,None,13,13,1.0,74,,,,,,feature_type +112,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.859589328406253e-07,0.25392293346701533,auto,255,None,4,80,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,standardize,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,65.7135135608214,mutual_info,,,,feature_type +113,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,9.097196057095871e-06,0.32800101253288033,auto,255,None,53,28,20,loss,1E-07,0.101919468281566,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008121631984215255,median,robust_scaler,,,0.7654960296398138,0.25,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.07288328235161678,fwe,chi2,feature_type +114,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.5463209559127865,None,0.0,12,5,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.025636105021492692,mean,standardize,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,1.4472785394247571,False,True,1,squared_hinge,ovr,l1,0.00018809455411335498,,,,,,,,,,,,,,,,,,,,,,,feature_type +115,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.07970183198340376,None,0.0,9,9,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.008729901092151533,most_frequent,quantile_transformer,1028,normal,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,True,True,,,,,,,,,,,,,feature_type +116,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,off,2.050858257794119e-10,0.0509713008465305,auto,255,None,17,16,,loss,1E-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.0006772048110168395,median,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.42719682445182733,fdr,f_classif,feature_type +117,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.4886932005592788,None,0.0,1,17,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,median,minmax,,,,,liblinear_svc_preprocessor,,,,,,,,,,,,,,,,,,,,,,,,,,2.2673178962517726,False,True,1,squared_hinge,ovr,l1,0.07576775715726437,,,,,,,,,,,,,,,,,,,,,,,feature_type +118,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1954.4001293172055,0.5941977727413141,3,0.01222672837922025,poly,-1,False,0.000868704184075337,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.023611476558497053,most_frequent,quantile_transformer,1312,normal,,,extra_trees_preproc_for_classification,True,entropy,None,0.26038719206370126,None,0.0,6,14,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +119,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,gini,None,0.48772464140872207,None,0.0,1,16,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.010000000000000004,most_frequent,normalize,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type +120,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,571.8976418358935,-0.6542106402522795,,5.0850539598583375e-05,sigmoid,-1,False,0.003954814208041632,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,median,none,,,,,select_rates_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.1378799965815952,fwe,f_classif,feature_type +121,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,entropy,None,0.6204291847226782,None,0.0,2,7,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.012802264108301202,most_frequent,normalize,,,,,fast_ica,,,,,,,,,,,deflation,exp,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +122,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,gini,None,0.7159488281157247,None,0.0,15,3,0.0,,,,,,,,,,,,no_encoding,no_coalescense,,mean,robust_scaler,,,0.7546151696972261,0.25941712940346606,feature_agglomeration,,,,,,,,,,,,,,,cosine,average,33,mean,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +123,none,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.4285190453868457,None,0.0,1,2,0.0,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.49851517731857553,most_frequent,quantile_transformer,958,uniform,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,False,False,,,,,,,,,,,,,feature_type +124,weighting,random_forest,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,entropy,None,0.690301155272815,None,0.0,19,7,0.0,,,,,,,,,,,,no_encoding,minority_coalescer,0.00018373331741554494,most_frequent,normalize,,,,,extra_trees_preproc_for_classification,False,gini,None,0.5638614505566816,None,0.0,1,7,0.0,100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +125,none,qda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.563056219822946,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.32793677336996485,most_frequent,none,,,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +126,weighting,passive_aggressive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.77554422882638,True,True,hinge,0.02071347009354619,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.01999967026715386,most_frequent,minmax,,,,,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.011708181260754043,8770,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +127,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,valid,2.5396632902741157e-05,0.18584665494894462,auto,255,None,6,48,13,loss,1E-07,0.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,none,,,,,feature_agglomeration,,,,,,,,,,,,,,,euclidean,complete,379,max,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type +128,weighting,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,921.894056649221,,,0.4428468266254544,rbf,-1,False,1.0295003364004332e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,no_coalescense,,mean,robust_scaler,,,0.7588611364765459,0.25,fast_ica,,,,,,,,,,,deflation,logcosh,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,feature_type diff --git a/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py b/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py index 1e08805d87..6bb822a9ee 100644 --- a/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py +++ b/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py @@ -17,24 +17,30 @@ def test_metalearning_cs_size(self): data_dir = os.path.join(data_dir, "test_meta_base_data") os.chdir(data_dir) + # Total: 176, categorical: 3, numerical: 7, string: 7 + total = 179 + num_numerical = 6 + num_string = 11 + num_categorical = 3 for feat_type, cs_size in [ - ({"A": "numerical"}, 165), - ({"A": "categorical"}, 162), - ({"A": "string"}, 174), - ({"A": "numerical", "B": "categorical"}, 168), - ({"A": "numerical", "B": "string"}, 180), - ({"A": "categorical", "B": "string"}, 177), - ({"A": "categorical", "B": "string", "C": "numerical"}, 183), + ({"A": "numerical"}, total - num_string - num_categorical), + ({"A": "categorical"}, total - num_string - num_numerical), + ({"A": "string"}, total - num_categorical - num_numerical), + ({"A": "numerical", "B": "categorical"}, total - num_string), + ({"A": "numerical", "B": "string"}, total - num_categorical), + ({"A": "categorical", "B": "string"}, total - num_numerical), + ({"A": "categorical", "B": "string", "C": "numerical"}, total), ]: pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline( feat_type=feat_type ) self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type) - # print(self.cs.get_default_configuration()) self.logger = logging.getLogger() meta_base = MetaBase(self.cs, data_dir, logger=self.logger) self.meta_optimizer = metalearner.MetaLearningOptimizer( "233", self.cs, meta_base, logger=self.logger ) - self.assertEqual(len(self.meta_optimizer.configuration_space), cs_size) + self.assertEqual( + len(self.meta_optimizer.configuration_space), cs_size, feat_type + ) diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py index 0cf5ee6bd2..8efdad5e6c 100644 --- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py @@ -6,6 +6,11 @@ class PreprocessingPipelineFeatTypeTest(unittest.TestCase): + + num_numerical = 6 + num_categorical = 3 + num_text = 11 + def test_single_type(self): DPP = FeatTypeSplit(feat_type={"A": "numerical"}) cs = DPP.get_hyperparameter_search_space( @@ -22,7 +27,7 @@ def test_single_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("text", key.split(":")[0]) self.assertNotIn("categorical", key.split(":")[0]) - self.assertEqual(len(cs), 6) + self.assertEqual(len(cs), self.num_numerical) DPP = FeatTypeSplit(feat_type={"A": "categorical"}) cs = DPP.get_hyperparameter_search_space( @@ -39,7 +44,7 @@ def test_single_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("text", key.split(":")[0]) self.assertNotIn("numerical", key.split(":")[0]) - self.assertEqual(len(cs), 3) + self.assertEqual(len(cs), self.num_categorical) DPP = FeatTypeSplit(feat_type={"A": "string"}) cs = DPP.get_hyperparameter_search_space( @@ -56,7 +61,7 @@ def test_single_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("numerical", key.split(":")[0]) self.assertNotIn("categorical", key.split(":")[0]) - self.assertEqual(len(cs), 15) + self.assertEqual(len(cs), self.num_text) def test_dual_type(self): DPP = FeatTypeSplit(feat_type={"A": "numerical", "B": "categorical"}) @@ -73,7 +78,7 @@ def test_dual_type(self): ) for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("text", key.split(":")[0]) - self.assertEqual(len(cs), 9) + self.assertEqual(len(cs), self.num_numerical + self.num_categorical) DPP = FeatTypeSplit(feat_type={"A": "categorical", "B": "string"}) cs = DPP.get_hyperparameter_search_space( @@ -89,7 +94,7 @@ def test_dual_type(self): ) for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("numerical", key.split(":")[0]) - self.assertEqual(len(cs), 18) + self.assertEqual(len(cs), self.num_categorical + self.num_text) DPP = FeatTypeSplit(feat_type={"A": "string", "B": "numerical"}) cs = DPP.get_hyperparameter_search_space( @@ -105,7 +110,7 @@ def test_dual_type(self): ) for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("categorical", key.split(":")[0]) - self.assertEqual(len(cs), 21) + self.assertEqual(len(cs), self.num_text + self.num_numerical) def test_triple_type(self): DPP = FeatTypeSplit( @@ -132,4 +137,6 @@ def test_triple_type(self): truth_table[2] = True self.assertEqual(sum(truth_table), 3) - self.assertEqual(len(cs), 24) + self.assertEqual( + len(cs), self.num_numerical + self.num_categorical + self.num_text + ) diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py index eed5b01bea..bc81efca48 100644 --- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py @@ -1,150 +1,281 @@ import numpy as np import pandas as pd -from autosklearn.pipeline.components.data_preprocessing.text_encoding.bag_of_word_encoding import ( # noqa: E501 - BagOfWordEncoder as BOW, +from autosklearn.pipeline.components.data_preprocessing.text_encoding.tfidf_encoding import ( # noqa: E501 + TfidfEncoder as Vectorizer, ) -from autosklearn.pipeline.components.data_preprocessing.text_encoding.bag_of_word_encoding_distinct import ( # noqa: E501 - BagOfWordEncoder as BOW_distinct, + +import pytest + + +@pytest.mark.parametrize( + "analyzer,per_column", + [("word", True), ("word", False), ("char", True), ("char", False)], ) +def test_fit_transform(analyzer, per_column): + X = pd.DataFrame( + { + "col1": ["hello world", "hello mars"], + "col2": ["Test Test", "This is a test column"], + } + ).astype({"col1": "string", "col2": "string"}) + Vectorizer_fitted = Vectorizer( + analyzer=analyzer, + per_column=per_column, + random_state=1, + ).fit(X.copy()) + + if per_column: + for column in X.columns: + Yt = Vectorizer_fitted.preprocessor[column].vocabulary_ + if column == "col1": + if analyzer == "word": + words = sorted( + [ + "hello", + "world", + "mars", + ] + ) # is ignored by TFIDFVectorizer + Y = {key: idx for idx, key in enumerate(words)} + assert Yt == Y + else: + words = sorted( + [ + "hell", + "ello", + "llo ", + "lo w", + "o wo", + " wor", + "worl", + "orld", + "lo m", + "o ma", + " mar", + "mars", + ] + ) + Y = {key: idx for idx, key in enumerate(words)} + assert Yt == Y + elif column == "col2": + if analyzer == "word": + words = sorted( + [ + "test", + "this", + "is", # "a" is not added, len(...)=1, + "column", + ] + ) # is ignored by TFIDFVectorizer + Y = {key: idx for idx, key in enumerate(words)} + assert Yt == Y + else: + words = sorted( + [ + "test", + "est ", + "st t", + "t te", + " tes", + "this", + "his ", + "is i", + "s is", + " is ", + "is a", + "s a ", + " a t", + "a te", + "st c", + "t co", + " col", + "colu", + "olum", + "lumn", + ] + ) + Y = {key: idx for idx, key in enumerate(words)} + assert Yt == Y + else: + raise ValueError(column) + else: + Yt = Vectorizer_fitted.preprocessor.vocabulary_ + if analyzer == "word": + words = sorted( + [ + "hello", + "world", + "mars", + "test", + "this", + "is", # "a" is not added, len(...)=1, + "column", + ] + ) # is ignored by TFIDFVectorizer + Y = {key: idx for idx, key in enumerate(words)} + assert Yt == Y + else: + words = sorted( + [ + "hell", + "ello", + "llo ", + "lo w", + "o wo", + " wor", + "worl", + "orld", + "lo m", + "o ma", + " mar", + "mars", + "test", + "est ", + "st t", + "t te", + " tes", + "this", + "his ", + "is i", + "s is", + " is ", + "is a", + "s a ", + " a t", + "a te", + "st c", + "t co", + " col", + "colu", + "olum", + "lumn", + ] + ) + Y = {key: idx for idx, key in enumerate(words)} + assert Yt == Y + + +@pytest.mark.parametrize("per_column", [True, False]) +def test_transform(per_column): + X = pd.DataFrame( + { + "col1": ["hello world", "hello mars"], + "col2": ["Test Test", "This is a test column"], + } + ).astype({"col1": "string", "col2": "string"}) + vectorizer = Vectorizer( + per_column=per_column, + analyzer="word", + random_state=1, + ) + X_t = vectorizer.fit_transform(X.copy()) -import unittest - - -class TextPreprocessingPipelineTest(unittest.TestCase): - def test_fit_transform(self): - X = pd.DataFrame( - { - "col1": ["hello world", "This is a test"], - "col2": ["hello mars", "This is the second column"], - } - ).astype({"col1": "string", "col2": "string"}) - BOW_fitted = BOW( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit(X.copy()) - - Yt = BOW_fitted.preprocessor.vocabulary_ - words = sorted( + if per_column: + # ['hello', 'mars', 'world', 'column', 'is', 'test', 'this'] + y = np.array( [ - "hello", - "world", - "this", - "is", - "test", # "a" is not added, len(...)=1 - "mars", - "the", - "second", - "column", + [ + 0.57974, + 0.0, + 0.8148, + 0.0, + 0.0, + 1.0, + 0.0, + ], + [0.57974, 0.8148, 0.0, 0.53405, 0.53405, 0.37998, 0.53405], ] - ) # is ignored by CountVectorizer - Y = {key: idx for idx, key in enumerate(words)} - - np.testing.assert_array_equal(Yt, Y) - - BOW_fitted = BOW_distinct( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit(X.copy()) - - for key in BOW_fitted.preprocessor: - y = [] - for col in X[key]: - y += [word for word in col.lower().split(" ") if len(word) > 1] - y = sorted(y) - yt = sorted(BOW_fitted.preprocessor[key].vocabulary_.keys()) - np.testing.assert_array_equal(yt, y) - - def test_transform(self): - X = pd.DataFrame( - { - "col1": ["hello world", "this is a test"], - "col2": ["hello mars", "this is the second column"], - } - ).astype({"col1": "string", "col2": "string"}) - X_t = BOW( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit_transform(X.copy()) - - # ['column', 'hello', 'is', 'mars', 'second', 'test', 'the', 'this', 'world'] - y = np.array([[0, 2, 0, 1, 0, 0, 0, 0, 1], [1, 0, 2, 0, 1, 1, 1, 2, 0]]) - np.testing.assert_array_equal(X_t.toarray(), y) - - X_t = BOW_distinct( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit_transform(X.copy()) - - # 'hello', 'is', 'test', 'this', 'world', - # 'column', 'hello', 'is', 'mars', 'second', 'the', 'this' + ) + np.testing.assert_almost_equal(X_t.toarray(), y, decimal=5) + else: + print(vectorizer.preprocessor.vocabulary_) + # 'column', 'hello', 'is', 'mars', 'test', 'this', 'world y = np.array( - [[1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0], [0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1]] + [ + [0.0, 0.61913, 0.0, 0.0, 1.0, 0.0, 0.78529], + [0.52547, 0.61913, 0.52547, 0.78529, 0.41429, 0.52547, 0.0], + ] ) - np.testing.assert_array_equal(X_t.toarray(), y) - - def test_check_shape(self): - X = pd.DataFrame( - { - "col1": ["hello world", "this is test"], - "col2": ["test test", "test test"], - } - ).astype({"col1": "string", "col2": "string"}) - X_t = BOW( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit_transform(X.copy()) - - self.assertEqual(X_t.shape, (2, 5)) - - X_t = BOW_distinct( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit_transform(X.copy()) - - self.assertEqual(X_t.shape, (2, 6)) - - def test_check_nan(self): - X = pd.DataFrame( - { - "col1": ["hello world", "this is test", None], - "col2": ["test test", "test test", "test"], - } - ).astype({"col1": "string", "col2": "string"}) - X_t = BOW( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit_transform(X.copy()) - - self.assertEqual(X_t.shape, (3, 5)) - - X_t = BOW_distinct( - ngram_upper_bound=1, - min_df_choice="min_df_absolute", - min_df_absolute=0, - min_df_relative=0, - random_state=1, - ).fit_transform(X.copy()) - - self.assertEqual(X_t.shape, (3, 6)) + np.testing.assert_almost_equal(X_t.toarray(), y, decimal=5) + + +def test_check_shape(): + X = pd.DataFrame( + { + "col1": ["hello world", "this is test"], + "col2": ["test test", "test test"], + } + ).astype({"col1": "string", "col2": "string"}) + X_t = Vectorizer( + per_column=True, + analyzer="word", + random_state=1, + ).fit_transform(X.copy()) + + assert X_t.shape == (2, 6) + + X_t = Vectorizer( + analyzer="word", + per_column=False, + random_state=1, + ).fit_transform(X.copy()) + + assert X_t.shape == (2, 5) + + +def test_check_nan(): + X = pd.DataFrame( + { + "col1": ["hello world", "this is test", None], + "col2": ["test test", "test test", "test"], + } + ).astype({"col1": "string", "col2": "string"}) + X_t = Vectorizer( + per_column=True, + analyzer="word", + random_state=1, + ).fit_transform(X.copy()) + assert X_t.shape == (3, 6) + + X_t = Vectorizer( + analyzer="word", + per_column=False, + random_state=1, + ).fit_transform(X.copy()) + assert X_t.shape == (3, 5) + + +def test_check_vocabulary(): + X = pd.DataFrame( + { + "col1": ["hello world", "this is test", None], + "col2": ["test test", "test test", "test"], + } + ).astype({"col1": "string", "col2": "string"}) + vectorizer = Vectorizer( + per_column=True, + analyzer="word", + random_state=1, + ).fit(X.copy()) + assert vectorizer.preprocessor["col1"].vocabulary_ == { + "hello": 0, + "world": 4, + "this": 3, + "is": 1, + "test": 2, + } + assert vectorizer.preprocessor["col2"].vocabulary_ == {"test": 0} + + vectorizer = Vectorizer( + analyzer="word", + per_column=False, + random_state=1, + ).fit(X.copy()) + assert vectorizer.preprocessor.vocabulary_ == { + "hello": 0, + "world": 4, + "this": 3, + "is": 1, + "test": 2, + } diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py index 9721f88e31..5b23cb37c9 100644 --- a/test/test_pipeline/test_classification.py +++ b/test/test_pipeline/test_classification.py @@ -669,7 +669,7 @@ def test_get_hyperparameter_search_space(self): * 7 rescaling choices * 16 classifier choices * 13 features preprocessor choices - * 183 total hyperparameters + * 176 total hyperparameters * (n_hyperparameters - 4) different conditionals for the pipeline * 53 forbidden combinations """ @@ -694,7 +694,7 @@ def test_get_hyperparameter_search_space(self): self.assertEqual(n_preprocessors, 13) hyperparameters = cs.get_hyperparameters() - self.assertEqual(len(hyperparameters), 183) + self.assertEqual(len(hyperparameters), 179) # for hp in sorted([str(h) for h in hyperparameters]): # print hp diff --git a/test/test_pipeline/test_regression.py b/test/test_pipeline/test_regression.py index 3a50decb8c..9f6d524173 100644 --- a/test/test_pipeline/test_regression.py +++ b/test/test_pipeline/test_regression.py @@ -320,7 +320,7 @@ def test_get_hyperparameter_search_space(self): conditions = cs.get_conditions() hyperparameters = cs.get_hyperparameters() forbiddens = cs.get_forbiddens() - self.assertEqual(171, len(hyperparameters)) + self.assertEqual(167, len(hyperparameters)) self.assertEqual(len(hyperparameters) - 3, len(conditions)) self.assertEqual(len(forbiddens), 35)