From eb5592887f93ae89c6a01e106fdad6034084aab8 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 18:56:17 +0000 Subject: [PATCH 01/23] udpate test_parquet since fastparquet now handles tz --- pandas/tests/io/test_parquet.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 8a6a22abe23fa..fa849690f4fce 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -486,10 +486,9 @@ def test_datetime_tz(self, fp): # doesn't preserve tz df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, tz='US/Eastern')}) - # warns on the coercion with catch_warnings(record=True): - check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) + check_round_trip(df, fp) def test_filter_row_groups(self, fp): d = {'a': list(range(0, 3))} From 2f4fc0790a5e5c51eb80bbfadf4c80f0bb424c56 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 18:57:29 +0000 Subject: [PATCH 02/23] bring back dtype kwarg because it is needed for DatetimeTZBlock --- pandas/core/internals.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c2d3d0852384c..270f9e71b3ae9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -206,7 +206,7 @@ def array_dtype(self): """ return self.dtype - def make_block(self, values, placement=None, ndim=None): + def make_block(self, values, placement=None, ndim=None, dtype=None): """ Create a new block, with type inference propagate any values that are not specified @@ -216,7 +216,7 @@ def make_block(self, values, placement=None, ndim=None): if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim) + return make_block(values, placement=placement, ndim=ndim, dtype=dtype) def make_block_scalar(self, values): """ @@ -224,12 +224,13 @@ def make_block_scalar(self, values): """ return ScalarBlock(values) - def make_block_same_class(self, values, placement=None, ndim=None): + def make_block_same_class(self, values, placement=None, ndim=None, + dtype=None): """ Wrap given values in a block of same type as self. """ if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, - klass=self.__class__) + klass=self.__class__, dtype=dtype) def __unicode__(self): From e18f8e699d7b97025efe62e70a167f1d4d6e9a1d Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 20:27:42 +0000 Subject: [PATCH 03/23] version dependence test_datetime_tz --- pandas/tests/io/test_parquet.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index fa849690f4fce..0e956a228163f 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -483,9 +483,15 @@ def test_categorical(self, fp): check_round_trip(df, fp) def test_datetime_tz(self, fp): - # doesn't preserve tz - df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, - tz='US/Eastern')}) + + # generic test data + df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3)}) + + # fastparquet supports timezone since 0.1.4, and not before + import fastparquet + if LooseVersion(fastparquet.__version__) > LooseVersion('0.1.3'): + df['dt_tz'] = pd.date_range('20130101', periods=3, tz='US/Eastern') + # warns on the coercion with catch_warnings(record=True): check_round_trip(df, fp) From 9a07d97369456b85ef05d75055d019f27eaab7ff Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 21:16:42 +0000 Subject: [PATCH 04/23] separate test cases for new and old behaviour of fastparquet --- pandas/tests/io/test_parquet.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 0e956a228163f..8c313c28b467d 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -448,6 +448,11 @@ class TestParquetFastParquet(Base): def test_basic(self, fp, df_full): df = df_full + # additional supported types for fastparquet >= 0.1.4 + if LooseVersion(fastparquet.__version__) > LooseVersion('0.1.3'): + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='US/Eastern') + # additional supported types for fastparquet df['timedelta'] = pd.timedelta_range('1 day', periods=3) @@ -482,19 +487,14 @@ def test_categorical(self, fp): df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) check_round_trip(df, fp) - def test_datetime_tz(self, fp): - - # generic test data - df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3)}) - - # fastparquet supports timezone since 0.1.4, and not before - import fastparquet - if LooseVersion(fastparquet.__version__) > LooseVersion('0.1.3'): - df['dt_tz'] = pd.date_range('20130101', periods=3, tz='US/Eastern') - - # warns on the coercion - with catch_warnings(record=True): - check_round_trip(df, fp) + def test_datetime_tz_old(self, fp): + if LooseVersion(fastparquet.__version__) < LooseVersion('0.1.4'): + # fastparquet<0.1.4 doesn't preserve tz + df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, + tz='US/Eastern')}) + # warns on the coercion + with catch_warnings(record=True): + check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) def test_filter_row_groups(self, fp): d = {'a': list(range(0, 3))} From 3d9810d2f0b4b8327100bec585ba44858b761a47 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 21:20:49 +0000 Subject: [PATCH 05/23] tidy test_datetime_tz to test old behaviour of fastparquet<0.14 --- pandas/tests/io/test_parquet.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 8c313c28b467d..d4ef50cad8990 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -487,14 +487,16 @@ def test_categorical(self, fp): df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) check_round_trip(df, fp) - def test_datetime_tz_old(self, fp): - if LooseVersion(fastparquet.__version__) < LooseVersion('0.1.4'): - # fastparquet<0.1.4 doesn't preserve tz - df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, - tz='US/Eastern')}) - # warns on the coercion - with catch_warnings(record=True): - check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) + def test_datetime_tz(self, fp): + if LooseVersion(fastparquet.__version__) > LooseVersion('0.1.3'): + pytest.skip("timezone not supported for older fp") + + # fastparquet<0.1.4 doesn't preserve tz + df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, + tz='US/Eastern')}) + # warns on the coercion + with catch_warnings(record=True): + check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) def test_filter_row_groups(self, fp): d = {'a': list(range(0, 3))} From ee75fdff87f201f41724b5b9b757758370c776b5 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 21:24:42 +0000 Subject: [PATCH 06/23] rephase reason to skip test case for oldder fp --- pandas/tests/io/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d4ef50cad8990..682e63fcb3513 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -489,7 +489,7 @@ def test_categorical(self, fp): def test_datetime_tz(self, fp): if LooseVersion(fastparquet.__version__) > LooseVersion('0.1.3'): - pytest.skip("timezone not supported for older fp") + pytest.skip("timezones supported in newer versions of fp") # fastparquet<0.1.4 doesn't preserve tz df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, From 68d6324d99386577712db01f58cbac2965fdf63c Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 21:54:50 +0000 Subject: [PATCH 07/23] follow pytest fixture pattern as in pyarrow --- pandas/tests/io/test_parquet.py | 38 ++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 682e63fcb3513..6bd6432359bdc 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -71,6 +71,24 @@ def fp(): return 'fastparquet' +@pytest.fixture +def fp_lt_014(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): + pytest.skip("fastparquet is >= 0.1.4") + return 'fastparquet' + + +@pytest.fixture +def fp_ge_014(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + if LooseVersion(fastparquet.__version__) < LooseVersion('0.1.4'): + pytest.skip("fastparquet is < 0.1.4") + return 'fastparquet' + + @pytest.fixture def df_compat(): return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) @@ -445,18 +463,13 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa): class TestParquetFastParquet(Base): - def test_basic(self, fp, df_full): + def test_basic(self, fp_ge_014, df_full): df = df_full - # additional supported types for fastparquet >= 0.1.4 - if LooseVersion(fastparquet.__version__) > LooseVersion('0.1.3'): - df['datetime_tz'] = pd.date_range('20130101', periods=3, - tz='US/Eastern') - - # additional supported types for fastparquet + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='US/Eastern') df['timedelta'] = pd.timedelta_range('1 day', periods=3) - - check_round_trip(df, fp) + check_round_trip(df, fp_ge_014) @pytest.mark.skip(reason="not supported") def test_duplicate_columns(self, fp): @@ -487,16 +500,15 @@ def test_categorical(self, fp): df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) check_round_trip(df, fp) - def test_datetime_tz(self, fp): - if LooseVersion(fastparquet.__version__) > LooseVersion('0.1.3'): - pytest.skip("timezones supported in newer versions of fp") + def test_datetime_tz(self, fp_lt_014): # fastparquet<0.1.4 doesn't preserve tz df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, tz='US/Eastern')}) # warns on the coercion with catch_warnings(record=True): - check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) + check_round_trip(df, fp_lt_014, + expected=df.astype('datetime64[ns]')) def test_filter_row_groups(self, fp): d = {'a': list(range(0, 3))} From 985081d40e5897ae6c61d9d57352aea947db36c0 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 22:05:16 +0000 Subject: [PATCH 08/23] follow pyarrow test_basic style for fastparquet new behaviour>=0.1.4 --- pandas/tests/io/test_parquet.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 6bd6432359bdc..bf51e7b306aeb 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -463,13 +463,15 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa): class TestParquetFastParquet(Base): - def test_basic(self, fp_ge_014, df_full): + def test_basic(self, fp, df_full): df = df_full - df['datetime_tz'] = pd.date_range('20130101', periods=3, - tz='US/Eastern') + # additional supported types for fastparquet>=0.1.4 + if LooseVersion(pyarrow.__version__) >= LooseVersion('0.1.4'): + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='US/Eastern') df['timedelta'] = pd.timedelta_range('1 day', periods=3) - check_round_trip(df, fp_ge_014) + check_round_trip(df, fp) @pytest.mark.skip(reason="not supported") def test_duplicate_columns(self, fp): From 0cfcd3750590f151769f63005b8169acd5742212 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 22:18:35 +0000 Subject: [PATCH 09/23] fastparquet=0.1.3 --- ci/requirements-2.7.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index e3bd5e46026c5..81a45eaca8620 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet=0.1.3 From 0c4a6d7c9ead7c7bdba410bb326d7d73b5afb574 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 22:45:28 +0000 Subject: [PATCH 10/23] other api change --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4dde76dee46a5..47b0e467beea1 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -312,6 +312,7 @@ Other API Changes - :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) - Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) +- Compatibility in :func:`pandas.date_range` with fastparquet==0.1.4 which now supports timezone e.g. ``tz``='US/Eastern'`` (:issue:`19431`) .. _whatsnew_0230.deprecations: From 6ce68cfe1d2384b9ceeb25bcafc2856a9e2aa1f3 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 22:47:19 +0000 Subject: [PATCH 11/23] fix typo --- pandas/tests/io/test_parquet.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index bf51e7b306aeb..a7551923853cd 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -80,15 +80,6 @@ def fp_lt_014(): return 'fastparquet' -@pytest.fixture -def fp_ge_014(): - if not _HAVE_FASTPARQUET: - pytest.skip("fastparquet is not installed") - if LooseVersion(fastparquet.__version__) < LooseVersion('0.1.4'): - pytest.skip("fastparquet is < 0.1.4") - return 'fastparquet' - - @pytest.fixture def df_compat(): return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) @@ -467,7 +458,7 @@ def test_basic(self, fp, df_full): df = df_full # additional supported types for fastparquet>=0.1.4 - if LooseVersion(pyarrow.__version__) >= LooseVersion('0.1.4'): + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): df['datetime_tz'] = pd.date_range('20130101', periods=3, tz='US/Eastern') df['timedelta'] = pd.timedelta_range('1 day', periods=3) From f414743a67f359b9f49cdc2d989f5d23646638b2 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 22:56:40 +0000 Subject: [PATCH 12/23] deprecation warning for dtype in make_block_same_class. --- pandas/core/internals.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 270f9e71b3ae9..cc1854f4cd5e4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -227,6 +227,10 @@ def make_block_scalar(self, values): def make_block_same_class(self, values, placement=None, ndim=None, dtype=None): """ Wrap given values in a block of same type as self. """ + if dtype is not None: + # issue 19431 fastparquet is passing this + warnings.warn("dtype argument is deprecated, will be removed " + "in a future release.", DeprecationWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, From 0d76fe7ac1678f24b8faa9e3b5fea099d60555c1 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 22:57:45 +0000 Subject: [PATCH 13/23] Future warning for dtype in make_block_same_class. --- pandas/core/internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index cc1854f4cd5e4..218e3b947d32c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -230,7 +230,7 @@ def make_block_same_class(self, values, placement=None, ndim=None, if dtype is not None: # issue 19431 fastparquet is passing this warnings.warn("dtype argument is deprecated, will be removed " - "in a future release.", DeprecationWarning) + "in a future release.", FutureWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, From bb95dc61dc8792db0017a832c41c95626f5b5866 Mon Sep 17 00:00:00 2001 From: minggli Date: Sun, 28 Jan 2018 23:29:42 +0000 Subject: [PATCH 14/23] update notes as fastparquet nows supports timezone --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index ae04996b4fddf..5b2204bf4e4df 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4537,7 +4537,7 @@ See the documentation for `pyarrow `__ and .. note:: These engines are very similar and should read/write nearly identical parquet format files. - Currently ``pyarrow`` does not support timedelta data, and ``fastparquet`` does not support timezone aware datetimes (they are coerced to UTC). + Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` now supports timezone aware datetimes. These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). .. ipython:: python From d9a2e2a8a1d2ae98c4c6f2d8e593d66b2231b9e0 Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 00:16:45 +0000 Subject: [PATCH 15/23] remove fastparquet pin --- ci/requirements-2.7.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index 81a45eaca8620..e3bd5e46026c5 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet=0.1.3 +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet From 97b17e9b82a587a3e0958f2b80b95d1102097cdf Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 00:22:34 +0000 Subject: [PATCH 16/23] remove other api change as it is internal --- doc/source/whatsnew/v0.23.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 47b0e467beea1..4dde76dee46a5 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -312,7 +312,6 @@ Other API Changes - :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) - Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) -- Compatibility in :func:`pandas.date_range` with fastparquet==0.1.4 which now supports timezone e.g. ``tz``='US/Eastern'`` (:issue:`19431`) .. _whatsnew_0230.deprecations: From 800b74164d988f6761fa55f0c139e78fbd249458 Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 10:21:06 +0000 Subject: [PATCH 17/23] remove version --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 5b2204bf4e4df..fd174ac0da217 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4537,7 +4537,7 @@ See the documentation for `pyarrow `__ and .. note:: These engines are very similar and should read/write nearly identical parquet format files. - Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` now supports timezone aware datetimes. + Currently ``pyarrow`` does not support timedelta data, ``fastparquet`` now supports timezone aware datetimes. These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). .. ipython:: python From 52220b9f23691cb29493cde3d43e8597aec8be1f Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 10:21:50 +0000 Subject: [PATCH 18/23] remove dtype for make_block and DeprecationWarning on make_block_same_class --- pandas/core/internals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 218e3b947d32c..f3e5e4c99a899 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -206,7 +206,7 @@ def array_dtype(self): """ return self.dtype - def make_block(self, values, placement=None, ndim=None, dtype=None): + def make_block(self, values, placement=None, ndim=None): """ Create a new block, with type inference propagate any values that are not specified @@ -216,7 +216,7 @@ def make_block(self, values, placement=None, ndim=None, dtype=None): if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, dtype=dtype) + return make_block(values, placement=placement, ndim=ndim) def make_block_scalar(self, values): """ @@ -230,7 +230,7 @@ def make_block_same_class(self, values, placement=None, ndim=None, if dtype is not None: # issue 19431 fastparquet is passing this warnings.warn("dtype argument is deprecated, will be removed " - "in a future release.", FutureWarning) + "in a future release.", DeprecationWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, From c602a76a95abcb16f8c1fb03ad009b9d11d39df4 Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 10:22:45 +0000 Subject: [PATCH 19/23] FutureWarning on make_block_same_class --- pandas/core/internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f3e5e4c99a899..ec884035fe0c4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -230,7 +230,7 @@ def make_block_same_class(self, values, placement=None, ndim=None, if dtype is not None: # issue 19431 fastparquet is passing this warnings.warn("dtype argument is deprecated, will be removed " - "in a future release.", DeprecationWarning) + "in a future release.", FutureWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, From ddbbde397e1d6800e0116e6007a744ead470bb96 Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 10:56:30 +0000 Subject: [PATCH 20/23] test case for dtype and warning generation --- pandas/tests/internals/test_internals.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 57884e9816ed3..baf762342a33b 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -285,6 +285,14 @@ def test_delete(self): with pytest.raises(Exception): newb.delete(3) + def test_make_block_same_class(self): + block = create_block('M8[ns, US/Eastern]', [3]) + with pytest.warns(FutureWarning): + copy = block.make_block_same_class(block.values, + dtype=block.values.dtype) + assert block.dtype == copy.dtype + assert block.__class__ == copy.__class__ + class TestDatetimeBlock(object): From 6e6b5f01c752d4ed7596ecd0214a2dc433167f56 Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 11:14:10 +0000 Subject: [PATCH 21/23] issue number and simplify test --- pandas/tests/internals/test_internals.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index baf762342a33b..fba8bbddcca3b 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -286,12 +286,10 @@ def test_delete(self): newb.delete(3) def test_make_block_same_class(self): + # issue 19431 block = create_block('M8[ns, US/Eastern]', [3]) with pytest.warns(FutureWarning): - copy = block.make_block_same_class(block.values, - dtype=block.values.dtype) - assert block.dtype == copy.dtype - assert block.__class__ == copy.__class__ + block.make_block_same_class(block.values, dtype=block.values.dtype) class TestDatetimeBlock(object): From 326394fdf6e326385ce1a9709552913f1b60001c Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 11:23:11 +0000 Subject: [PATCH 22/23] misc doc --- doc/source/io.rst | 2 +- pandas/tests/io/test_parquet.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index fd174ac0da217..4199f161501ec 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4537,7 +4537,7 @@ See the documentation for `pyarrow `__ and .. note:: These engines are very similar and should read/write nearly identical parquet format files. - Currently ``pyarrow`` does not support timedelta data, ``fastparquet`` now supports timezone aware datetimes. + Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` supports timezone aware datetimes. These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). .. ipython:: python diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a7551923853cd..244b6f4244252 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -457,7 +457,7 @@ class TestParquetFastParquet(Base): def test_basic(self, fp, df_full): df = df_full - # additional supported types for fastparquet>=0.1.4 + # additional supported types for fastparquet if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): df['datetime_tz'] = pd.date_range('20130101', periods=3, tz='US/Eastern') From 77422ba2bdee149b092e39acff1077875a9e9790 Mon Sep 17 00:00:00 2001 From: minggli Date: Mon, 29 Jan 2018 11:35:23 +0000 Subject: [PATCH 23/23] use pandas warning assert --- pandas/tests/internals/test_internals.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index fba8bbddcca3b..f17306b8b52f9 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -288,7 +288,8 @@ def test_delete(self): def test_make_block_same_class(self): # issue 19431 block = create_block('M8[ns, US/Eastern]', [3]) - with pytest.warns(FutureWarning): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): block.make_block_same_class(block.values, dtype=block.values.dtype)