From 34f12654bce2c1cbeea90a0fcb2e0395824861d9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 30 Dec 2017 17:48:01 -0500 Subject: [PATCH] COMPAT: clean up warnings (#19003) --- appveyor.yml | 1 + doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/config.py | 4 +- pandas/core/dtypes/missing.py | 4 + pandas/tests/frame/test_constructors.py | 12 +- pandas/tests/frame/test_query_eval.py | 2 +- pandas/tests/groupby/test_nth.py | 2 +- pandas/tests/groupby/test_transform.py | 2 +- .../indexes/datetimes/test_date_range.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 8 +- .../tests/indexes/interval/test_interval.py | 2 +- pandas/tests/indexing/test_multiindex.py | 2 +- pandas/tests/io/formats/test_to_html.py | 3 +- pandas/tests/io/formats/test_to_latex.py | 4 +- pandas/tests/io/test_parquet.py | 6 +- pandas/tests/plotting/test_misc.py | 1 + pandas/tests/reshape/merge/test_merge_asof.py | 2 +- pandas/tests/reshape/test_concat.py | 4 +- pandas/tests/scalar/test_interval.py | 10 +- pandas/tests/series/test_missing.py | 5 +- pandas/tests/sparse/frame/test_frame.py | 164 ----------------- .../tests/sparse/frame/test_to_from_scipy.py | 168 ++++++++++++++++++ pandas/tests/test_algos.py | 8 +- pandas/tests/test_multilevel.py | 4 +- pandas/tests/util/test_testing.py | 18 +- 25 files changed, 227 insertions(+), 212 deletions(-) create mode 100644 pandas/tests/sparse/frame/test_to_from_scipy.py diff --git a/appveyor.yml b/appveyor.yml index 0aaac322c4ac7..ba001208864a8 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -15,6 +15,7 @@ environment: # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd" clone_folder: C:\projects\pandas + PANDAS_TESTING_MODE: "deprecate" matrix: diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 92564285bb36a..6407a33c442d0 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -207,6 +207,7 @@ Other API Changes - :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) - In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) +- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/config.py b/pandas/core/config.py index d10e2d19be665..692aed178719d 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -613,7 +613,7 @@ def _warn_if_deprecated(key): if d: if d.msg: print(d.msg) - warnings.warn(d.msg, DeprecationWarning) + warnings.warn(d.msg, FutureWarning) else: msg = "'{key}' is deprecated".format(key=key) if d.removal_ver: @@ -624,7 +624,7 @@ def _warn_if_deprecated(key): else: msg += ', please refrain from using it.' - warnings.warn(msg, DeprecationWarning) + warnings.warn(msg, FutureWarning) return True return False diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index d208c72ffee19..ffac702476af1 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -316,6 +316,10 @@ def array_equivalent(left, right, strict_nan=False): # NaNs can occur in float and complex arrays. if is_float_dtype(left) or is_complex_dtype(left): + + # empty + if not (np.prod(left.shape) and np.prod(right.shape)): + return True return ((left == right) | (isna(left) & isna(right))).all() # numpy will will not allow this type of datetimelike vs integer comparison diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8be6c4875ae24..b7d3a60ecf6e4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -328,13 +328,13 @@ def test_constructor_error_msgs(self): # wrong size axis labels with tm.assert_raises_regex(ValueError, "Shape of passed values " - "is \(3, 2\), indices " - "imply \(3, 1\)"): + r"is \(3, 2\), indices " + r"imply \(3, 1\)"): DataFrame(np.random.rand(2, 3), columns=['A', 'B', 'C'], index=[1]) with tm.assert_raises_regex(ValueError, "Shape of passed values " - "is \(3, 2\), indices " - "imply \(2, 2\)"): + r"is \(3, 2\), indices " + r"imply \(2, 2\)"): DataFrame(np.random.rand(2, 3), columns=['A', 'B'], index=[1, 2]) with tm.assert_raises_regex(ValueError, "If using all scalar " @@ -1220,12 +1220,12 @@ def test_constructor_from_items(self): def test_constructor_from_items_scalars(self): # GH 17312 with tm.assert_raises_regex(ValueError, - 'The value in each \(key, value\) ' + r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): DataFrame.from_items([('A', 1), ('B', 4)]) with tm.assert_raises_regex(ValueError, - 'The value in each \(key, value\) ' + r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], orient='index') diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 22066d59cf14d..55aeaf6e77be1 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1040,6 +1040,6 @@ def test_invalid_type_for_operator_raises(self, parser, engine): ops = '+', '-', '*', '/' for op in ops: with tm.assert_raises_regex(TypeError, - "unsupported operand type\(s\) " + r"unsupported operand type\(s\) " "for .+: '.+' and '.+'"): df.eval('a {0} b'.format(op), engine=engine, parser=parser) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 2a408b85f0ed1..ccde545b5b8e9 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -175,7 +175,7 @@ def test_nth(self): df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) g = df.groupby('A') # PR 17493, related to issue 11038 - # test Series.nth with True for dropna produces DeprecationWarning + # test Series.nth with True for dropna produces FutureWarning with assert_produces_warning(FutureWarning): result = g.B.nth(0, dropna=True) expected = g.B.first() diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index c0ea968ab0819..8f72da293a50c 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -261,7 +261,7 @@ def test_transform_casting(self): 9 B-053 b76cd912ff "2014-10-08 19:17:48" 10 B-065 b76cd912ff "2014-10-08 19:21:38" """ - df = pd.read_csv(StringIO(data), sep='\s+', + df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=[0], parse_dates=['DATETIME']) result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff()) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 3ce51983c111d..3738398d017f8 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -402,7 +402,7 @@ def test_daterange_bug_456(self): assert isinstance(result, DatetimeIndex) def test_error_with_zero_monthends(self): - msg = 'Offset <0 \* MonthEnds> did not increment date' + msg = r'Offset <0 \* MonthEnds> did not increment date' with tm.assert_raises_regex(ValueError, msg): date_range('1/1/2000', '1/1/2001', freq=MonthEnd(0)) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index f94a438fcdaa5..44f3c21d23e62 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -709,7 +709,7 @@ def test_dataframe(self, cache): 'day': [4, 5]}) msg = ("cannot assemble the datetimes: time data .+ does not " - "match format '%Y%m%d' \(match\)") + r"match format '%Y%m%d' \(match\)") with tm.assert_raises_regex(ValueError, msg): to_datetime(df2, cache=cache) result = to_datetime(df2, errors='coerce', cache=cache) @@ -719,15 +719,15 @@ def test_dataframe(self, cache): # extra columns msg = ("extra keys have been passed to the datetime assemblage: " - "\[foo\]") + r"\[foo\]") with tm.assert_raises_regex(ValueError, msg): df2 = df.copy() df2['foo'] = 1 to_datetime(df2, cache=cache) # not enough - msg = ('to assemble mappings requires at least that \[year, month, ' - 'day\] be specified: \[.+\] is missing') + msg = (r'to assemble mappings requires at least that \[year, month, ' + r'day\] be specified: \[.+\] is missing') for c in [['year'], ['year', 'month'], ['year', 'month', 'second'], diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 3ca4c31b7f059..4805c957907e6 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -175,7 +175,7 @@ def test_constructors_empty(self, data, closed): def test_constructors_errors(self): # scalar - msg = ('IntervalIndex\(...\) must be called with a collection of ' + msg = (r'IntervalIndex\(...\) must be called with a collection of ' 'some kind, 5 was passed') with tm.assert_raises_regex(TypeError, msg): IntervalIndex(5) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index d89c64fc5b9f8..f69b9d98143b0 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -299,7 +299,7 @@ def test_getitem_partial_int(self): # missing item: with tm.assert_raises_regex(KeyError, '1'): df[1] - with tm.assert_raises_regex(KeyError, "'\[1\] not in index'"): + with tm.assert_raises_regex(KeyError, r"'\[1\] not in index'"): df[[1]] def test_loc_multiindex_indexer_none(self): diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index b263d368f41f5..9e063c2d176e1 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1411,8 +1411,9 @@ def test_to_html_border_zero(self): result = df.to_html(border=0) assert 'border="0"' in result + @tm.capture_stdout def test_display_option_warning(self): - with tm.assert_produces_warning(DeprecationWarning, + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): pd.options.html.border diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 2f8ef32722051..f266a8b3a3268 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -424,11 +424,11 @@ def test_to_latex_longtable(self, frame): df = DataFrame({'a': [1, 2]}) with1column_result = df.to_latex(index=False, longtable=True) - assert "\multicolumn{1}" in with1column_result + assert r"\multicolumn{1}" in with1column_result df = DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) with3columns_result = df.to_latex(index=False, longtable=True) - assert "\multicolumn{3}" in with3columns_result + assert r"\multicolumn{3}" in with3columns_result def test_to_latex_escape_special_chars(self): special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^', diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index c59acbd946f91..31c2ded49b7a0 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from pandas.compat import PY3, is_platform_windows +from pandas.compat import PY3, is_platform_windows, is_platform_mac from pandas.io.parquet import (to_parquet, read_parquet, get_engine, PyArrowImpl, FastParquetImpl) from pandas.util import testing as tm @@ -174,8 +174,8 @@ def test_options_get_engine(fp, pa): assert isinstance(get_engine('fastparquet'), FastParquetImpl) -@pytest.mark.xfail(is_platform_windows(), - reason="reading pa metadata failing on Windows") +@pytest.mark.xfail(is_platform_windows() or is_platform_mac(), + reason="reading pa metadata failing on Windows/mac") def test_cross_engine_pa_fp(df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 60ed280bc050e..9e538ae130a85 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -206,6 +206,7 @@ def test_parallel_coordinates(self): def test_parallel_coordinates_with_sorted_labels(self): """ For #15908 """ from pandas.plotting import parallel_coordinates + df = DataFrame({"feat": [i for i in range(30)], "class": [2 for _ in range(10)] + [3 for _ in range(10)] + diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 4b2680b9be592..2f48aef1894a9 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -976,7 +976,7 @@ def test_on_float_by_int(self): def test_merge_datatype_error(self): """ Tests merge datatype mismatch error """ - msg = 'merge keys \[0\] object and int64, must be the same type' + msg = r'merge keys \[0\] object and int64, must be the same type' left = pd.DataFrame({'left_val': [1, 5, 10], 'a': ['a', 'b', 'c']}) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index bdbf2a0ee2f68..f66cb12b11210 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -178,9 +178,9 @@ def test_concatlike_same_dtypes(self): tm.assert_series_equal(res, exp, check_index_type=True) # cannot append non-index - msg = ('cannot concatenate object of type \"(.+?)\";' + msg = (r'cannot concatenate object of type \"(.+?)\";' ' only pd.Series, pd.DataFrame, and pd.Panel' - ' \(deprecated\) objs are valid') + r' \(deprecated\) objs are valid') with tm.assert_raises_regex(TypeError, msg): pd.Series(vals1).append(vals2) diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index 3db474e32c4dd..23dad9736dac5 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -122,7 +122,7 @@ def test_math_add(self, interval): actual += 1 assert expected == actual - msg = "unsupported operand type\(s\) for \+" + msg = r"unsupported operand type\(s\) for \+" with tm.assert_raises_regex(TypeError, msg): interval + Interval(1, 2) @@ -138,7 +138,7 @@ def test_math_sub(self, interval): actual -= 1 assert expected == actual - msg = "unsupported operand type\(s\) for -" + msg = r"unsupported operand type\(s\) for -" with tm.assert_raises_regex(TypeError, msg): interval - Interval(1, 2) @@ -158,11 +158,11 @@ def test_math_mult(self, interval): actual *= 2 assert expected == actual - msg = "unsupported operand type\(s\) for \*" + msg = r"unsupported operand type\(s\) for \*" with tm.assert_raises_regex(TypeError, msg): interval * Interval(1, 2) - msg = "can\'t multiply sequence by non-int" + msg = r"can\'t multiply sequence by non-int" with tm.assert_raises_regex(TypeError, msg): interval * 'foo' @@ -175,7 +175,7 @@ def test_math_div(self, interval): actual /= 2.0 assert expected == actual - msg = "unsupported operand type\(s\) for /" + msg = r"unsupported operand type\(s\) for /" with tm.assert_raises_regex(TypeError, msg): interval / Interval(1, 2) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8c6a4fcf4b1d4..0dc5e23184af7 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -480,12 +480,9 @@ def test_isna_for_inf(self): def test_isnull_for_inf_deprecated(self): # gh-17115 s = Series(['a', np.inf, np.nan, 1.0]) - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - pd.set_option('mode.use_inf_as_null', True) + with pd.option_context('mode.use_inf_as_null', True): r = s.isna() dr = s.dropna() - pd.reset_option('mode.use_inf_as_null') e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index cf002ff046c2e..058892e3b85ff 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -7,21 +7,14 @@ from numpy import nan import numpy as np import pandas as pd -from distutils.version import LooseVersion from pandas import Series, DataFrame, bdate_range, Panel -from pandas.core.dtypes.common import ( - is_bool_dtype, - is_float_dtype, - is_object_dtype, - is_float) from pandas.core.indexes.datetimes import DatetimeIndex from pandas.tseries.offsets import BDay from pandas.util import testing as tm from pandas.compat import lrange from pandas import compat from pandas.core.sparse import frame as spf -import pandas.util._test_decorators as td from pandas._libs.sparse import BlockIndex, IntIndex from pandas.core.sparse.api import SparseSeries, SparseDataFrame, SparseArray @@ -1171,163 +1164,6 @@ def test_notna(self): tm.assert_frame_equal(res.to_dense(), exp) -@td.skip_if_no_scipy -@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811 -@pytest.mark.parametrize('columns', [None, list('def')]) -@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) -@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) -def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): - # GH 4343 - # Make one ndarray and from it one sparse matrix, both to be used for - # constructing frames and comparing results - arr = np.eye(3, dtype=dtype) - # GH 16179 - arr[0, 1] = dtype(2) - try: - spm = spmatrix(arr) - assert spm.dtype == arr.dtype - except (TypeError, AssertionError): - # If conversion to sparse fails for this spmatrix type and arr.dtype, - # then the combination is not currently supported in NumPy, so we - # can just skip testing it thoroughly - return - - sdf = pd.SparseDataFrame(spm, index=index, columns=columns, - default_fill_value=fill_value) - - # Expected result construction is kind of tricky for all - # dtype-fill_value combinations; easiest to cast to something generic - # and except later on - rarr = arr.astype(object) - rarr[arr == 0] = np.nan - expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( - fill_value if fill_value is not None else np.nan) - - # Assert frame is as expected - sdf_obj = sdf.astype(object) - tm.assert_sp_frame_equal(sdf_obj, expected) - tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) - - # Assert spmatrices equal - assert dict(sdf.to_coo().todok()) == dict(spm.todok()) - - # Ensure dtype is preserved if possible - was_upcast = ((fill_value is None or is_float(fill_value)) and - not is_object_dtype(dtype) and - not is_float_dtype(dtype)) - res_dtype = (bool if is_bool_dtype(dtype) else - float if was_upcast else - dtype) - tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) - assert sdf.to_coo().dtype == res_dtype - - # However, adding a str column results in an upcast to object - sdf['strings'] = np.arange(len(sdf)).astype(str) - assert sdf.to_coo().dtype == np.object_ - - -@td.skip_if_no_scipy -@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 -def test_from_to_scipy_object(spmatrix, fill_value): - # GH 4343 - dtype = object - columns = list('cd') - index = list('ab') - import scipy - if (spmatrix is scipy.sparse.dok_matrix and LooseVersion( - scipy.__version__) >= LooseVersion('0.19.0')): - pytest.skip("dok_matrix from object does not work in SciPy >= 0.19") - - # Make one ndarray and from it one sparse matrix, both to be used for - # constructing frames and comparing results - arr = np.eye(2, dtype=dtype) - try: - spm = spmatrix(arr) - assert spm.dtype == arr.dtype - except (TypeError, AssertionError): - # If conversion to sparse fails for this spmatrix type and arr.dtype, - # then the combination is not currently supported in NumPy, so we - # can just skip testing it thoroughly - return - - sdf = pd.SparseDataFrame(spm, index=index, columns=columns, - default_fill_value=fill_value) - - # Expected result construction is kind of tricky for all - # dtype-fill_value combinations; easiest to cast to something generic - # and except later on - rarr = arr.astype(object) - rarr[arr == 0] = np.nan - expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( - fill_value if fill_value is not None else np.nan) - - # Assert frame is as expected - sdf_obj = sdf.astype(object) - tm.assert_sp_frame_equal(sdf_obj, expected) - tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) - - # Assert spmatrices equal - assert dict(sdf.to_coo().todok()) == dict(spm.todok()) - - # Ensure dtype is preserved if possible - res_dtype = object - tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) - assert sdf.to_coo().dtype == res_dtype - - -@td.skip_if_no_scipy -def test_from_scipy_correct_ordering(spmatrix): - # GH 16179 - arr = np.arange(1, 5).reshape(2, 2) - try: - spm = spmatrix(arr) - assert spm.dtype == arr.dtype - except (TypeError, AssertionError): - # If conversion to sparse fails for this spmatrix type and arr.dtype, - # then the combination is not currently supported in NumPy, so we - # can just skip testing it thoroughly - return - - sdf = pd.SparseDataFrame(spm) - expected = pd.SparseDataFrame(arr) - tm.assert_sp_frame_equal(sdf, expected) - tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) - - -@td.skip_if_no_scipy -def test_from_scipy_fillna(spmatrix): - # GH 16112 - arr = np.eye(3) - arr[1:, 0] = np.nan - - try: - spm = spmatrix(arr) - assert spm.dtype == arr.dtype - except (TypeError, AssertionError): - # If conversion to sparse fails for this spmatrix type and arr.dtype, - # then the combination is not currently supported in NumPy, so we - # can just skip testing it thoroughly - return - - sdf = pd.SparseDataFrame(spm).fillna(-1.0) - - # Returning frame should fill all nan values with -1.0 - expected = pd.SparseDataFrame({ - 0: pd.SparseSeries([1., -1, -1]), - 1: pd.SparseSeries([np.nan, 1, np.nan]), - 2: pd.SparseSeries([np.nan, np.nan, 1]), - }, default_fill_value=-1) - - # fill_value is expected to be what .fillna() above was called with - # We don't use -1 as initial fill_value in expected SparseSeries - # construction because this way we obtain "compressed" SparseArrays, - # avoiding having to construct them ourselves - for col in expected: - expected[col].fill_value = -1 - - tm.assert_sp_frame_equal(sdf, expected) - - class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self): diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py new file mode 100644 index 0000000000000..aef49c84fc2ad --- /dev/null +++ b/pandas/tests/sparse/frame/test_to_from_scipy.py @@ -0,0 +1,168 @@ +import pytest +import numpy as np +from warnings import catch_warnings +from pandas.util import testing as tm +from pandas import SparseDataFrame, SparseSeries +from distutils.version import LooseVersion +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_float_dtype, + is_object_dtype, + is_float) + + +scipy = pytest.importorskip('scipy') + + +@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811 +@pytest.mark.parametrize('columns', [None, list('def')]) +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) +@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) +def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): + # GH 4343 + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(3, dtype=dtype) + # GH 16179 + arr[0, 1] = dtype(2) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + assert dict(sdf.to_coo().todok()) == dict(spm.todok()) + + # Ensure dtype is preserved if possible + was_upcast = ((fill_value is None or is_float(fill_value)) and + not is_object_dtype(dtype) and + not is_float_dtype(dtype)) + res_dtype = (bool if is_bool_dtype(dtype) else + float if was_upcast else + dtype) + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + assert sdf.to_coo().dtype == res_dtype + + # However, adding a str column results in an upcast to object + sdf['strings'] = np.arange(len(sdf)).astype(str) + assert sdf.to_coo().dtype == np.object_ + + +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +def test_from_to_scipy_object(spmatrix, fill_value): + # GH 4343 + dtype = object + columns = list('cd') + index = list('ab') + + if (spmatrix is scipy.sparse.dok_matrix and LooseVersion( + scipy.__version__) >= LooseVersion('0.19.0')): + pytest.skip("dok_matrix from object does not work in SciPy >= 0.19") + + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(2, dtype=dtype) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + with catch_warnings(record=True): + assert dict(sdf.to_coo().todok()) == dict(spm.todok()) + + # Ensure dtype is preserved if possible + res_dtype = object + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + assert sdf.to_coo().dtype == res_dtype + + +def test_from_scipy_correct_ordering(spmatrix): + # GH 16179 + arr = np.arange(1, 5).reshape(2, 2) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm) + expected = SparseDataFrame(arr) + tm.assert_sp_frame_equal(sdf, expected) + tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + + +def test_from_scipy_fillna(spmatrix): + # GH 16112 + arr = np.eye(3) + arr[1:, 0] = np.nan + + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm).fillna(-1.0) + + # Returning frame should fill all nan values with -1.0 + expected = SparseDataFrame({ + 0: SparseSeries([1., -1, -1]), + 1: SparseSeries([np.nan, 1, np.nan]), + 2: SparseSeries([np.nan, np.nan, 1]), + }, default_fill_value=-1) + + # fill_value is expected to be what .fillna() above was called with + # We don't use -1 as initial fill_value in expected SparseSeries + # construction because this way we obtain "compressed" SparseArrays, + # avoiding having to construct them ourselves + for col in expected: + expected[col].fill_value = -1 + + tm.assert_sp_frame_equal(sdf, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d7fc5033bab90..6b3b519d49f7f 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -788,10 +788,10 @@ def test_duplicated_with_nas(self): 2, 4, 1, 5, 6]), np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]), - pytest.mark.xfail(reason="Complex bug. GH 16399")( - np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j, - 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]) - ), + pytest.param(np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j, + 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]), + marks=pytest.mark.xfail(reason="Complex bug. GH 16399") + ), np.array(['a', 'b', 'a', 'e', 'c', 'b', 'd', 'a', 'e', 'f'], dtype=object), np.array([1, 2**63, 1, 3**5, 10, 2**63, 39, 1, 3**5, 7], diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 86d9a9fa91e47..424ba6aab9a56 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2289,7 +2289,7 @@ def test_reset_index_multiindex_columns(self): # gh-16120: already existing column with tm.assert_raises_regex(ValueError, - ("cannot insert \('A', ''\), " + (r"cannot insert \('A', ''\), " "already exists")): df.rename_axis('A').reset_index() @@ -2323,7 +2323,7 @@ def test_reset_index_multiindex_columns(self): # ... which is incompatible with col_fill=None with tm.assert_raises_regex(ValueError, ("col_fill=None is incompatible with " - "incomplete column name \('C', 'c'\)")): + r"incomplete column name \('C', 'c'\)")): df2.rename_axis([('C', 'c')]).reset_index(col_fill=None) # with col_level != 0 diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index 31580bc9eab57..1c878604b11a2 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -48,12 +48,18 @@ def test_assert_almost_equal_numbers_with_mixed(self): self._assert_not_almost_equal_both(1, [1, ]) self._assert_not_almost_equal_both(1, object()) - def test_assert_almost_equal_edge_case_ndarrays(self): - self._assert_almost_equal_both(np.array([], dtype='M8[ns]'), - np.array([], dtype='float64'), - check_dtype=False) - self._assert_almost_equal_both(np.array([], dtype=str), - np.array([], dtype='int64'), + @pytest.mark.parametrize( + "left_dtype", + ['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object']) + @pytest.mark.parametrize( + "right_dtype", + ['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object']) + def test_assert_almost_equal_edge_case_ndarrays( + self, left_dtype, right_dtype): + + # empty compare + self._assert_almost_equal_both(np.array([], dtype=left_dtype), + np.array([], dtype=right_dtype), check_dtype=False) def test_assert_almost_equal_dicts(self):