Skip to content

Commit

Permalink
Update pandas version to 0.24 (#451)
Browse files Browse the repository at this point in the history
* Update pandas version to 0.24

* pandas release notes: http://pandas.pydata.org/pandas-docs/stable/whatsnew/v0.24.0.html
* Update imports to match changes in pandas
* Add functionality for list of functions on `axis=1` for `apply`
* Remove `pd.match` from API
* Small regression in pandas requires regression in Modin
  * pandas-dev/pandas#25101 reports this issue
  * pandas-dev/pandas#25102 resolves this issue
* TODO: Expose `pandas.Array` once we properly test

* Finishing regression update in `all`/`any`

* Update to pandas 0.24 in setup.py and requirements.txt

* Bump to 0.24.1

* Update API and add a test for the API

* Add test for API, update API

* Update API test and finalize compatibility updates

* Revert bug

* Cleanup and add tests

* Fix bug in test

* Lint

* Lint

* Remove print

* Fix transform tests and bug in transform

* Add list test for test_rename

* Fix transform bug
  • Loading branch information
devin-petersohn authored Feb 21, 2019
1 parent 226c705 commit 5b861c2
Show file tree
Hide file tree
Showing 11 changed files with 493 additions and 196 deletions.
4 changes: 3 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ exclude_lines =
pragma: no cover
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
raise NotImplementedError
raise ImportError
assert
9 changes: 9 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ matrix:
- black --check modin/
- flake8 .

- os: linux
dist: trusty
env:
- PYTHON=3.6
- API_COMPAT=1
script:
- export PATH="$HOME/miniconda/bin:$PATH"
- python -m pytest modin/pandas/test/test_api.py

install:
- ./.travis/install-dependencies.sh

Expand Down
42 changes: 29 additions & 13 deletions modin/data_management/query_compiler/pandas_query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
is_datetime_or_timedelta_dtype,
is_bool_dtype,
)
from pandas.core.index import _ensure_index
from pandas.core.index import ensure_index
from pandas.core.base import DataError

from modin.engines.base.block_partitions import BaseBlockPartitions
Expand Down Expand Up @@ -97,7 +97,7 @@ def pandas_index_extraction(df, axis):
return index_obj[new_indices] if compute_diff else new_indices

def _validate_set_axis(self, new_labels, old_labels):
new_labels = _ensure_index(new_labels)
new_labels = ensure_index(new_labels)
old_len = len(old_labels)
new_len = len(new_labels)
if old_len != new_len:
Expand All @@ -118,14 +118,14 @@ def _get_columns(self):

def _set_index(self, new_index):
if self._index_cache is None:
self._index_cache = _ensure_index(new_index)
self._index_cache = ensure_index(new_index)
else:
new_index = self._validate_set_axis(new_index, self._index_cache)
self._index_cache = new_index

def _set_columns(self, new_columns):
if self._columns_cache is None:
self._columns_cache = _ensure_index(new_columns)
self._columns_cache = ensure_index(new_columns)
else:
new_columns = self._validate_set_axis(new_columns, self._columns_cache)
self._columns_cache = new_columns
Expand Down Expand Up @@ -1388,11 +1388,16 @@ def _process_all_any(self, func, **kwargs):

if bool_only:
if axis == 0 and not axis_none and len(not_bool_col) == len(self.columns):
return pandas.Series(dtype=bool)
if len(not_bool_col) == len(self.columns):
query_compiler = self
else:
query_compiler = self.drop(columns=not_bool_col)
# TODO add this line back once pandas-dev/pandas#25101 is resolved
# return pandas.Series(dtype=bool)
pass
# See note above about pandas-dev/pandas#25101
# TODO remove this when pandas 0.24.2 is released.
query_compiler = self
# if len(not_bool_col) == len(self.columns):
# query_compiler = self
# else:
# query_compiler = self.drop(columns=not_bool_col)
else:
if (
bool_only is False
Expand Down Expand Up @@ -2492,11 +2497,22 @@ def _list_like_func(self, func, axis, *args, **kwargs):
Returns:
A new PandasQueryCompiler.
"""
func_prepared = self._prepare_method(lambda df: df.apply(func, *args, **kwargs))
func_prepared = self._prepare_method(
lambda df: df.apply(func, axis, *args, **kwargs)
)
new_data = self._map_across_full_axis(axis, func_prepared)
# When the function is list-like, the function names become the index
new_index = [f if isinstance(f, string_types) else f.__name__ for f in func]
return self.__constructor__(new_data, new_index, self.columns)
# When the function is list-like, the function names become the index/columns
new_index = (
[f if isinstance(f, string_types) else f.__name__ for f in func]
if axis == 0
else self.index
)
new_columns = (
[f if isinstance(f, string_types) else f.__name__ for f in func]
if axis == 1
else self.columns
)
return self.__constructor__(new_data, new_index, new_columns)

def _callable_func(self, func, axis, *args, **kwargs):
"""Apply callable functions across given axis.
Expand Down
2 changes: 2 additions & 0 deletions modin/engines/base/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ def to_sql(
index_label=None,
chunksize=None,
dtype=None,
method=None,
):
ErrorMessage.default_to_pandas("`to_sql`")
df = qc.to_pandas()
Expand All @@ -447,4 +448,5 @@ def to_sql(
index_label=index_label,
chunksize=chunksize,
dtype=dtype,
method=method,
)
6 changes: 2 additions & 4 deletions modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
factorize,
test,
qcut,
match,
Panel,
date_range,
period_range,
Expand Down Expand Up @@ -64,7 +63,7 @@
from .plotting import Plotting as plotting
from .. import __execution_engine__ as execution_engine

__pandas_version__ = "0.23.4"
__pandas_version__ = "0.24.1"

if pandas.__version__ != __pandas_version__:
raise ImportError(
Expand Down Expand Up @@ -131,7 +130,7 @@ def initialize_ray():
if execution_engine == "Ray":
initialize_ray()
num_cpus = ray.global_state.cluster_resources()["CPU"]
elif execution_engine == "Dask":
elif execution_engine == "Dask": # pragma: no cover
from distributed.client import _get_global_client

if threading.current_thread().name == "MainThread":
Expand Down Expand Up @@ -174,7 +173,6 @@ def initialize_ray():
"factorize",
"test",
"qcut",
"match",
"to_datetime",
"get_dummies",
"isna",
Expand Down
Loading

0 comments on commit 5b861c2

Please sign in to comment.