From 5822ad92a950972d8368349f3ee336ee2f1e6d7d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 09:04:16 -0700 Subject: [PATCH 1/7] move tests specific to tslibs.parsing --- pandas/tests/indexes/datetimes/test_tools.py | 134 +----------------- pandas/tests/scalar/test_parsing.py | 137 +++++++++++++++++++ 2 files changed, 144 insertions(+), 127 deletions(-) create mode 100644 pandas/tests/scalar/test_parsing.py diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 330ec9f357655..8205b4fde217b 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -897,69 +897,6 @@ def test_dayfirst(self): class TestGuessDatetimeFormat(object): - - def test_guess_datetime_format_with_parseable_formats(self): - tm._skip_if_not_us_locale() - dt_string_to_format = (('20111230', '%Y%m%d'), - ('2011-12-30', '%Y-%m-%d'), - ('30-12-2011', '%d-%m-%Y'), - ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), - ('2011-12-30 00:00:00.000000', - '%Y-%m-%d %H:%M:%S.%f'), ) - - for dt_string, dt_format in dt_string_to_format: - assert tools._guess_datetime_format(dt_string) == dt_format - - def test_guess_datetime_format_with_dayfirst(self): - ambiguous_string = '01/01/2011' - assert tools._guess_datetime_format( - ambiguous_string, dayfirst=True) == '%d/%m/%Y' - assert tools._guess_datetime_format( - ambiguous_string, dayfirst=False) == '%m/%d/%Y' - - def test_guess_datetime_format_with_locale_specific_formats(self): - # The month names will vary depending on the locale, in which - # case these wont be parsed properly (dateutil can't parse them) - tm._skip_if_has_locale() - - dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), - ('30/December/2011', '%d/%B/%Y'), - ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) - - for dt_string, dt_format in dt_string_to_format: - assert tools._guess_datetime_format(dt_string) == dt_format - - def test_guess_datetime_format_invalid_inputs(self): - # A datetime string must include a year, month and a day for it - # to be guessable, in addition to being a string that looks like - # a datetime - invalid_dts = [ - '2013', - '01/2013', - '12:00:00', - '1/1/1/1', - 'this_is_not_a_datetime', - '51a', - 9, - datetime(2011, 1, 1), - ] - - for invalid_dt in invalid_dts: - assert tools._guess_datetime_format(invalid_dt) is None - - def test_guess_datetime_format_nopadding(self): - # GH 11142 - dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), - ('30-1-2011', '%d-%m-%Y'), - ('1/1/2011', '%m/%d/%Y'), - ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), - ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) - - for dt_string, dt_format in dt_string_to_format: - assert tools._guess_datetime_format(dt_string) == dt_format - def test_guess_datetime_format_for_array(self): tm._skip_if_not_us_locale() expected_format = '%Y-%m-%d %H:%M:%S.%f' @@ -1074,21 +1011,6 @@ def test_day_not_in_month_ignore(self): class TestDatetimeParsingWrappers(object): - def test_does_not_convert_mixed_integer(self): - bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') - - for bad_date_string in bad_date_strings: - assert not parsing._does_string_look_like_datetime(bad_date_string) - - good_date_strings = ('2012-01-01', - '01/01/2012', - 'Mon Sep 16, 2013', - '01012012', - '0101', - '1-1', ) - - for good_date_string in good_date_strings: - assert parsing._does_string_look_like_datetime(good_date_string) def test_parsers(self): @@ -1148,8 +1070,8 @@ def test_parsers(self): } for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, - yearfirst=yearfirst) + result1, _, _ = parsing.parse_time_string(date_str, + yearfirst=yearfirst) result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below @@ -1175,7 +1097,7 @@ def test_parsers(self): assert result7 == expected # NaT - result1, _, _ = tools.parse_time_string('NaT') + result1, _, _ = parsing.parse_time_string('NaT') result2 = to_datetime('NaT') result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] @@ -1184,12 +1106,6 @@ def test_parsers(self): assert result3 is tslib.NaT assert result4 is tslib.NaT - def test_parsers_quarter_invalid(self): - - cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] - for case in cases: - pytest.raises(ValueError, tools.parse_time_string, case) - def test_parsers_dayfirst_yearfirst(self): # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 @@ -1264,9 +1180,9 @@ def test_parsers_dayfirst_yearfirst(self): yearfirst=yearfirst) assert dateutil_result == expected - result1, _, _ = tools.parse_time_string(date_str, - dayfirst=dayfirst, - yearfirst=yearfirst) + result1, _, _ = parsing.parse_time_string(date_str, + dayfirst=dayfirst, + yearfirst=yearfirst) # we don't support dayfirst/yearfirst here: if not dayfirst and not yearfirst: @@ -1289,7 +1205,7 @@ def test_parsers_timestring(self): '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} for date_str, (exp_now, exp_def) in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str) + result1, _, _ = parsing.parse_time_string(date_str) result2 = to_datetime(date_str) result3 = to_datetime([date_str]) result4 = Timestamp(date_str) @@ -1338,34 +1254,6 @@ def test_parsers_time(self): assert isinstance(res, list) assert res == expected_arr - def test_parsers_monthfreq(self): - cases = {'201101': datetime(2011, 1, 1, 0, 0), - '200005': datetime(2000, 5, 1, 0, 0)} - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, freq='M') - assert result1 == expected - - def test_parsers_quarterly_with_freq(self): - msg = ('Incorrect quarterly string is given, quarter ' - 'must be between 1 and 4: 2013Q5') - with tm.assert_raises_regex(parsing.DateParseError, msg): - tools.parse_time_string('2013Q5') - - # GH 5418 - msg = ('Unable to retrieve month information from given freq: ' - 'INVLD-L-DEC-SAT') - with tm.assert_raises_regex(parsing.DateParseError, msg): - tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') - - cases = {('2013Q2', None): datetime(2013, 4, 1), - ('2013Q2', 'A-APR'): datetime(2012, 8, 1), - ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} - - for (date_str, freq), exp in compat.iteritems(cases): - result, _, _ = tools.parse_time_string(date_str, freq=freq) - assert result == exp - def test_parsers_timezone_minute_offsets_roundtrip(self): # GH11708 base = to_datetime("2013-01-01 00:00:00") @@ -1423,14 +1311,6 @@ def test_parsers_iso8601(self): class TestArrayToDatetime(object): - - def test_try_parse_dates(self): - arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) - - result = parsing.try_parse_dates(arr, dayfirst=True) - expected = [parse(d, dayfirst=True) for d in arr] - assert np.array_equal(result, expected) - def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) tm.assert_numpy_array_equal( diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/scalar/test_parsing.py new file mode 100644 index 0000000000000..3c63cc532ec66 --- /dev/null +++ b/pandas/tests/scalar/test_parsing.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +""" +Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx +""" +from datetime import datetime + +import numpy as np + +from pandas import compat +from pandas.util import testing as tm + +from pandas._libs.tslibs import parsing + + +class TestDatetimeParsingWrappers(object): + def test_does_not_convert_mixed_integer(self): + bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') + + for bad_date_string in bad_date_strings: + assert not parsing._does_string_look_like_datetime(bad_date_string) + + good_date_strings = ('2012-01-01', + '01/01/2012', + 'Mon Sep 16, 2013', + '01012012', + '0101', + '1-1', ) + + for good_date_string in good_date_strings: + assert parsing._does_string_look_like_datetime(good_date_string) + + def test_parsers_quarterly_with_freq(self): + msg = ('Incorrect quarterly string is given, quarter ' + 'must be between 1 and 4: 2013Q5') + with tm.assert_raises_regex(parsing.DateParseError, msg): + parsing.parse_time_string('2013Q5') + + # GH 5418 + msg = ('Unable to retrieve month information from given freq: ' + 'INVLD-L-DEC-SAT') + with tm.assert_raises_regex(parsing.DateParseError, msg): + parsing.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') + + cases = {('2013Q2', None): datetime(2013, 4, 1), + ('2013Q2', 'A-APR'): datetime(2012, 8, 1), + ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} + + for (date_str, freq), exp in compat.iteritems(cases): + result, _, _ = parsing.parse_time_string(date_str, freq=freq) + assert result == exp + + def test_parsers_quarter_invalid(self): + + cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] + for case in cases: + pytest.raises(ValueError, parsing.parse_time_string, case) + + def test_parsers_monthfreq(self): + cases = {'201101': datetime(2011, 1, 1, 0, 0), + '200005': datetime(2000, 5, 1, 0, 0)} + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = parsing.parse_time_string(date_str, freq='M') + assert result1 == expected + + +class TestGuessDatetimeFormat(object): + def test_guess_datetime_format_with_parseable_formats(self): + tm._skip_if_not_us_locale() + dt_string_to_format = (('20111230', '%Y%m%d'), + ('2011-12-30', '%Y-%m-%d'), + ('30-12-2011', '%d-%m-%Y'), + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), + ('2011-12-30 00:00:00.000000', + '%Y-%m-%d %H:%M:%S.%f'), ) + + for dt_string, dt_format in dt_string_to_format: + assert parsing._guess_datetime_format(dt_string) == dt_format + + def test_guess_datetime_format_with_dayfirst(self): + ambiguous_string = '01/01/2011' + assert parsing._guess_datetime_format( + ambiguous_string, dayfirst=True) == '%d/%m/%Y' + assert parsing._guess_datetime_format( + ambiguous_string, dayfirst=False) == '%m/%d/%Y' + + def test_guess_datetime_format_with_locale_specific_formats(self): + # The month names will vary depending on the locale, in which + # case these wont be parsed properly (dateutil can't parse them) + tm._skip_if_has_locale() + + dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), + ('30/December/2011', '%d/%B/%Y'), + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) + + for dt_string, dt_format in dt_string_to_format: + assert parsing._guess_datetime_format(dt_string) == dt_format + + def test_guess_datetime_format_invalid_inputs(self): + # A datetime string must include a year, month and a day for it + # to be guessable, in addition to being a string that looks like + # a datetime + invalid_dts = [ + '2013', + '01/2013', + '12:00:00', + '1/1/1/1', + 'this_is_not_a_datetime', + '51a', + 9, + datetime(2011, 1, 1), + ] + + for invalid_dt in invalid_dts: + assert parsing._guess_datetime_format(invalid_dt) is None + + def test_guess_datetime_format_nopadding(self): + # GH 11142 + dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), + ('30-1-2011', '%d-%m-%Y'), + ('1/1/2011', '%m/%d/%Y'), + ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), + ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) + + for dt_string, dt_format in dt_string_to_format: + assert parsing._guess_datetime_format(dt_string) == dt_format + +class TestArrayToDatetime(object): + + def test_try_parse_dates(self): + arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) + + result = parsing.try_parse_dates(arr, dayfirst=True) + expected = [parse(d, dayfirst=True) for d in arr] + assert np.array_equal(result, expected) From de7d8deea6d785fbc37508dc7f6d05ceb76058f4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 09:10:45 -0700 Subject: [PATCH 2/7] whitespace cleanup --- pandas/tests/scalar/test_parsing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/scalar/test_parsing.py index 3c63cc532ec66..bafb12b9c2351 100644 --- a/pandas/tests/scalar/test_parsing.py +++ b/pandas/tests/scalar/test_parsing.py @@ -5,6 +5,8 @@ from datetime import datetime import numpy as np +import pytest +from dateutil.parser import parse from pandas import compat from pandas.util import testing as tm @@ -24,7 +26,7 @@ def test_does_not_convert_mixed_integer(self): 'Mon Sep 16, 2013', '01012012', '0101', - '1-1', ) + '1-1') for good_date_string in good_date_strings: assert parsing._does_string_look_like_datetime(good_date_string) @@ -127,8 +129,8 @@ def test_guess_datetime_format_nopadding(self): for dt_string, dt_format in dt_string_to_format: assert parsing._guess_datetime_format(dt_string) == dt_format -class TestArrayToDatetime(object): +class TestArrayToDatetime(object): def test_try_parse_dates(self): arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) From a99cd6f644268286960480280e3a1e8ab9ccc0d7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 10:51:42 -0700 Subject: [PATCH 3/7] edit per reviewer suggestion --- pandas/tests/scalar/test_parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/scalar/test_parsing.py index bafb12b9c2351..c27f6d83c7aa6 100644 --- a/pandas/tests/scalar/test_parsing.py +++ b/pandas/tests/scalar/test_parsing.py @@ -136,4 +136,4 @@ def test_try_parse_dates(self): result = parsing.try_parse_dates(arr, dayfirst=True) expected = [parse(d, dayfirst=True) for d in arr] - assert np.array_equal(result, expected) + assert tm.assert_numpy_array_equal(result, expected) From 7965cab3e5a0b54731baa42091b80a697bbf26eb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 11:00:51 -0700 Subject: [PATCH 4/7] fixup remove extra assert --- pandas/tests/scalar/test_parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/scalar/test_parsing.py index c27f6d83c7aa6..83fcd9826ee1e 100644 --- a/pandas/tests/scalar/test_parsing.py +++ b/pandas/tests/scalar/test_parsing.py @@ -136,4 +136,4 @@ def test_try_parse_dates(self): result = parsing.try_parse_dates(arr, dayfirst=True) expected = [parse(d, dayfirst=True) for d in arr] - assert tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) From 215bc065d6cc290fb71925d63778d97b1bad31af Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 13:47:56 -0700 Subject: [PATCH 5/7] Fix test broken by change to tm.assert_numpy_array_equal --- pandas/tests/scalar/test_parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/scalar/test_parsing.py index 83fcd9826ee1e..6908fecbd4e05 100644 --- a/pandas/tests/scalar/test_parsing.py +++ b/pandas/tests/scalar/test_parsing.py @@ -135,5 +135,5 @@ def test_try_parse_dates(self): arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) result = parsing.try_parse_dates(arr, dayfirst=True) - expected = [parse(d, dayfirst=True) for d in arr] + expected = np.array([parse(d, dayfirst=True) for d in arr]) tm.assert_numpy_array_equal(result, expected) From a78600afbe389e43d1649442fa6c46307469be63 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 15:53:55 -0700 Subject: [PATCH 6/7] remove test class duplicated in test_misc --- pandas/tests/indexes/datetimes/test_ops.py | 46 ---------------------- 1 file changed, 46 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 424ef7fc3caf7..82719c92d2dc4 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -952,52 +952,6 @@ def test_equals(self): assert not idx.equals(pd.Series(idx3)) -class TestDateTimeIndexToJulianDate(object): - - def test_1700(self): - r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, - 2345901.5]) - r2 = date_range(start=Timestamp('1710-10-01'), periods=5, - freq='D').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_2000(self): - r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, - 2451605.5]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='D').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_hour(self): - r1 = Float64Index( - [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, - 2451601.625, 2451601.6666666666666666]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='H').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_minute(self): - r1 = Float64Index( - [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, - 2451601.5020833333333333, 2451601.5027777777777777]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='T').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_second(self): - r1 = Float64Index( - [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, - 2451601.5000347222222222, 2451601.5000462962962962]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='S').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - # GH 10699 @pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex], [tm.assert_series_equal, From beca092d4f4215bcb2b1f230479af19333bc68c5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 17:00:54 -0700 Subject: [PATCH 7/7] flake8 fixup --- pandas/tests/indexes/datetimes/test_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 82719c92d2dc4..b65d467dbd4b8 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -3,7 +3,7 @@ import dateutil import warnings import numpy as np -from datetime import timedelta +from datetime import timedelta, datetime from itertools import product import pandas as pd @@ -12,7 +12,7 @@ from pandas.errors import PerformanceWarning from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, - datetime, Float64Index, offsets, bdate_range) + bdate_range) from pandas.tseries.offsets import BMonthEnd, CDay, BDay from pandas.tests.test_base import Ops @@ -1077,7 +1077,7 @@ def test_shift_months(years, months): Timestamp('2000-12-31')]) actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + months)) - expected = DatetimeIndex([x + offsets.DateOffset( + expected = DatetimeIndex([x + pd.offsets.DateOffset( years=years, months=months) for x in s]) tm.assert_index_equal(actual, expected)