From ed3afc4afac1e46f1da8e24ca7848f2420b8ef9e Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 3 Feb 2018 20:30:29 +0000 Subject: [PATCH] ERR: Better error msg when merging on tz-aware and tz-naive columns (#19525) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/reshape/merge.py | 27 +++++++++--------------- pandas/tests/reshape/merge/test_merge.py | 4 +++- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 26a7a78bb5c55..69965f44d87a8 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -342,6 +342,7 @@ Other API Changes - Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 99ea2c4fe4688..3ec78ce52c6e5 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -940,6 +940,11 @@ def _maybe_coerce_merge_keys(self): elif is_dtype_equal(lk.dtype, rk.dtype): continue + msg = ("You are trying to merge on {lk_dtype} and " + "{rk_dtype} columns. If you wish to proceed " + "you should use pd.concat".format(lk_dtype=lk.dtype, + rk_dtype=rk.dtype)) + # if we are numeric, then allow differing # kinds to proceed, eg. int64 and int8, int and float # further if we are object, but we infer to @@ -968,30 +973,18 @@ def _maybe_coerce_merge_keys(self): pass # Check if we are trying to merge on obviously - # incompatible dtypes GH 9780 + # incompatible dtypes GH 9780, GH 15800 elif is_numeric_dtype(lk) and not is_numeric_dtype(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) raise ValueError(msg) elif not is_numeric_dtype(lk) and is_numeric_dtype(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) raise ValueError(msg) elif is_datetimelike(lk) and not is_datetimelike(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) raise ValueError(msg) elif not is_datetimelike(lk) and is_datetimelike(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) + raise ValueError(msg) + elif is_datetime64tz_dtype(lk) and not is_datetime64tz_dtype(rk): + raise ValueError(msg) + elif not is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): raise ValueError(msg) # Houston, we have a problem! diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a8319339c6435..f63c206c0c407 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1512,11 +1512,13 @@ def test_merge_on_ints_floats_warning(self): '2011-01-02']), (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]), (pd.date_range('1/1/2011', periods=2, freq='D'), [0.0, 1.0]), + (pd.date_range('20130101', periods=3), + pd.date_range('20130101', periods=3, tz='US/Eastern')), ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), ]) def test_merge_incompat_dtypes(self, df1_vals, df2_vals): - # GH 9780 + # GH 9780, GH 15800 # Raise a ValueError when a user tries to merge on # dtypes that are incompatible (e.g., obj and int/float)