Merge pull request #998 from automl/development

Release 0.11
automl · Nov 6, 2020 · 7efc5e2 · 7efc5e2
2 parents 7a3f3a5 + 9e04bd8
commit 7efc5e2
Show file tree

Hide file tree

Showing 625 changed files with 60,818 additions and 63,014 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -23,9 +23,9 @@ matrix:
   - os: linux
     env: DISTRIB="conda" DOCPUSH="true" PYTHON="3.7" SKIP_TESTS="true"
   - os: linux
-    env: DISTRIB="conda" RUN_FLAKE8="true" SKIP_TESTS="true"
+    env: DISTRIB="conda" PYTHON="3.8" RUN_FLAKE8="true" SKIP_TESTS="true"
   - os: linux
-    env: DISTRIB="conda" RUN_MYPY="true" SKIP_TESTS="true"
+    env: DISTRIB="conda" PYTHON="3.8" RUN_MYPY="true" SKIP_TESTS="true"
   - os: linux
     env: DISTRIB="conda" COVERAGE="true" PYTHON="3.6"
   - os: linux

diff --git a/autosklearn/__version__.py b/autosklearn/__version__.py
@@ -1,4 +1,4 @@
 """Version information."""
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.10.0"
+__version__ = "0.11.0"
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
diff --git a/autosklearn/data/abstract_data_manager.py b/autosklearn/data/abstract_data_manager.py
@@ -75,34 +75,6 @@ def encoder(self) -> DataPreprocessor:
     def encoder(self, value: DataPreprocessor) -> DataPreprocessor:
         self._encoder = value
 
-    def perform1HotEncoding(self) -> None:
-        sparse = True if self.info['is_sparse'] == 1 else False
-        has_missing = True if self.info['has_missing'] else False
-        to_encode = ['categorical']
-        if has_missing:
-            to_encode += ['binary']
-        encoding_mask = [feat_type.lower() in to_encode
-                         for feat_type in self.feat_type]
-
-        data = [self.data['X_train']]
-        if 'X_valid' in self.data:
-            data.append(self.data['X_valid'])
-        if 'X_test' in self.data:
-            data.append(self.data['X_test'])
-        data, sparse = perform_one_hot_encoding(
-            sparse=sparse, categorical=encoding_mask,
-            data=data)
-
-        self.info['is_sparse'] = 1 if sparse else 0
-        self.data['X_train'] = data[0]
-        if 'X_valid' in self.data and 'X_test' in self.data:
-            self.data['X_valid'] = data[1]
-            self.data['X_test'] = data[2]
-        elif 'X_valid' in self.data:
-            self.data['X_valid'] = data[1]
-        elif 'X_test' in self.data:
-            self.data['X_test'] = data[1]
-
     def __repr__(self) -> str:
         return 'DataManager : ' + self.name
 

diff --git a/autosklearn/data/validation.py b/autosklearn/data/validation.py
@@ -1,5 +1,6 @@
 # -*- encoding: utf-8 -*-
 
+import functools
 import warnings
 from typing import List, Optional, Tuple, Union
 
@@ -365,6 +366,25 @@ def _check_and_encode_features(
                 assert self.feature_encoder is not None
                 self.feature_encoder.fit(X)
 
+                # The column transformer reoders the feature types - we therefore need to change
+                # it as well
+                def comparator(cmp1, cmp2):
+                    if (
+                        cmp1 == 'categorical' and cmp2 == 'categorical'
+                        or cmp1 == 'numerical' and cmp2 == 'numerical'
+                    ):
+                        return 0
+                    elif cmp1 == 'categorical' and cmp2 == 'numerical':
+                        return -1
+                    elif cmp1 == 'numerical' and cmp2 == 'categorical':
+                        return 1
+                    else:
+                        raise ValueError((cmp1, cmp2))
+                self.feature_types = sorted(
+                    self.feature_types,
+                    key=functools.cmp_to_key(comparator)
+                )
+
         if self.feature_encoder:
             try:
                 X = self.feature_encoder.transform(X)