diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7cd880b51661d..afb6653b1e694 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -62,9 +62,9 @@ def generate_index_types(self, skip_index_keys=[]): Return a generator of the various index types, leaving out the ones with a key in skip_index_keys """ - for key, idx in self.indices.items(): + for key, index in self.indices.items(): if key not in skip_index_keys: - yield key, idx + yield key, index def test_new_axis(self): new_index = self.dateIndex[None, :] @@ -80,8 +80,8 @@ def test_copy_and_deepcopy(self, indices): @pytest.mark.parametrize("attr", ['strIndex', 'dateIndex']) def test_constructor_regular(self, attr): # regular instance creation - idx = getattr(self, attr) - tm.assert_contains_all(idx, idx) + index = getattr(self, attr) + tm.assert_contains_all(index, index) def test_constructor_casting(self): # casting @@ -108,14 +108,14 @@ def test_constructor_corner(self): # corner case pytest.raises(TypeError, Index, 0) - @pytest.mark.parametrize("idx_vals", [ + @pytest.mark.parametrize("index_vals", [ [('A', 1), 'B'], ['B', ('A', 1)]]) - def test_construction_list_mixed_tuples(self, idx_vals): + def test_construction_list_mixed_tuples(self, index_vals): # see gh-10697: if we are constructing from a mixed list of tuples, # make sure that we are independent of the sorting order. - idx = Index(idx_vals) - assert isinstance(idx, Index) - assert not isinstance(idx, MultiIndex) + index = Index(index_vals) + assert isinstance(index, Index) + assert not isinstance(index, MultiIndex) @pytest.mark.parametrize('na_value', [None, np.nan]) @pytest.mark.parametrize('vtype', [list, tuple, iter]) @@ -127,36 +127,36 @@ def test_construction_list_tuples_nan(self, na_value, vtype): tm.assert_index_equal(result, expected) @pytest.mark.parametrize("cast_as_obj", [True, False]) - @pytest.mark.parametrize("idx", [ + @pytest.mark.parametrize("index", [ pd.date_range('2015-01-01 10:00', freq='D', periods=3, tz='US/Eastern'), # DTI with tz pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz pd.timedelta_range('1 days', freq='D', periods=3), # td pd.period_range('2015-01-01', freq='D', periods=3) # period ]) - def test_constructor_from_index_dtlike(self, cast_as_obj, idx): + def test_constructor_from_index_dtlike(self, cast_as_obj, index): if cast_as_obj: - result = pd.Index(idx.astype(object)) + result = pd.Index(index.astype(object)) else: - result = pd.Index(idx) + result = pd.Index(index) - tm.assert_index_equal(result, idx) + tm.assert_index_equal(result, index) - if isinstance(idx, pd.DatetimeIndex) and hasattr(idx, 'tz'): - assert result.tz == idx.tz + if isinstance(index, pd.DatetimeIndex) and hasattr(index, 'tz'): + assert result.tz == index.tz - @pytest.mark.parametrize("idx,has_tz", [ + @pytest.mark.parametrize("index,has_tz", [ (pd.date_range('2015-01-01 10:00', freq='D', periods=3, tz='US/Eastern'), True), # datetimetz (pd.timedelta_range('1 days', freq='D', periods=3), False), # td (pd.period_range('2015-01-01', freq='D', periods=3), False) # period ]) - def test_constructor_from_series_dtlike(self, idx, has_tz): - result = pd.Index(pd.Series(idx)) - tm.assert_index_equal(result, idx) + def test_constructor_from_series_dtlike(self, index, has_tz): + result = pd.Index(pd.Series(index)) + tm.assert_index_equal(result, index) if has_tz: - assert result.tz == idx.tz + assert result.tz == index.tz @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) def test_constructor_from_series(self, klass): @@ -187,19 +187,19 @@ def test_constructor_from_frame_series_freq(self): df = pd.DataFrame(np.random.rand(5, 3)) df['date'] = dts result = DatetimeIndex(df['date'], freq='MS') - assert df['date'].dtype == object + assert df['date'].dtype == object expected.name = 'date' - exp = pd.Series(dts, name='date') - tm.assert_series_equal(df['date'], exp) + tm.assert_index_equal(result, expected) + + expected = pd.Series(dts, name='date') + tm.assert_series_equal(df['date'], expected) # GH 6274 # infer freq of same freq = pd.infer_freq(df['date']) assert freq == 'MS' - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("array", [ np.arange(5), np.array(['a', 'b', 'c']), date_range( '2000-01-01', periods=3).values @@ -255,14 +255,14 @@ def test_constructor_int_dtype_nan_raises(self, dtype): def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val): # GH 13467 na_list = [na_val, na_val] - exp = klass(na_list) - assert exp.dtype == dtype + expected = klass(na_list) + assert expected.dtype == dtype result = Index(na_list) - tm.assert_index_equal(result, exp) + tm.assert_index_equal(result, expected) result = Index(np.array(na_list)) - tm.assert_index_equal(result, exp) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("pos", [0, 1]) @pytest.mark.parametrize("klass,dtype,ctor", [ @@ -271,16 +271,16 @@ def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val): ]) def test_index_ctor_infer_nat_dt_like(self, pos, klass, dtype, ctor, nulls_fixture): - exp = klass([pd.NaT, pd.NaT]) - assert exp.dtype == dtype + expected = klass([pd.NaT, pd.NaT]) + assert expected.dtype == dtype data = [ctor] data.insert(pos, nulls_fixture) result = Index(data) - tm.assert_index_equal(result, exp) + tm.assert_index_equal(result, expected) result = Index(np.array(data, dtype=object)) - tm.assert_index_equal(result, exp) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("swap_objs", [True, False]) def test_index_ctor_nat_result(self, swap_objs): @@ -289,9 +289,9 @@ def test_index_ctor_nat_result(self, swap_objs): if swap_objs: data = data[::-1] - exp = pd.Index(data, dtype=object) - tm.assert_index_equal(Index(data), exp) - tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) + expected = pd.Index(data, dtype=object) + tm.assert_index_equal(Index(data), expected) + tm.assert_index_equal(Index(np.array(data, dtype=object)), expected) def test_index_ctor_infer_periodindex(self): xp = period_range('2012-1-1', freq='M', periods=3) @@ -304,9 +304,9 @@ def test_index_ctor_infer_periodindex(self): (['A', 'B', 'C', np.nan], 'obj') ]) def test_constructor_simple_new(self, vals, dtype): - idx = Index(vals, name=dtype) - result = idx._simple_new(idx, dtype) - tm.assert_index_equal(result, idx) + index = Index(vals, name=dtype) + result = index._simple_new(index, dtype) + tm.assert_index_equal(result, index) @pytest.mark.parametrize("vals", [ [1, 2, 3], np.array([1, 2, 3]), np.array([1, 2, 3], dtype=int), @@ -314,29 +314,29 @@ def test_constructor_simple_new(self, vals, dtype): [1., 2., 3.], np.array([1., 2., 3.], dtype=float) ]) def test_constructor_dtypes_to_int64(self, vals): - idx = Index(vals, dtype=int) - assert isinstance(idx, Int64Index) + index = Index(vals, dtype=int) + assert isinstance(index, Int64Index) @pytest.mark.parametrize("vals", [ [1, 2, 3], [1., 2., 3.], np.array([1., 2., 3.]), np.array([1, 2, 3], dtype=int), np.array([1., 2., 3.], dtype=float) ]) def test_constructor_dtypes_to_float64(self, vals): - idx = Index(vals, dtype=float) - assert isinstance(idx, Float64Index) + index = Index(vals, dtype=float) + assert isinstance(index, Float64Index) - @pytest.mark.parametrize("cast_idx", [True, False]) + @pytest.mark.parametrize("cast_index", [True, False]) @pytest.mark.parametrize("vals", [ [True, False, True], np.array([True, False, True], dtype=bool) ]) - def test_constructor_dtypes_to_object(self, cast_idx, vals): - if cast_idx: - idx = Index(vals, dtype=bool) + def test_constructor_dtypes_to_object(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=bool) else: - idx = Index(vals) + index = Index(vals) - assert isinstance(idx, Index) - assert idx.dtype == object + assert isinstance(index, Index) + assert index.dtype == object @pytest.mark.parametrize("vals", [ [1, 2, 3], np.array([1, 2, 3], dtype=int), @@ -345,38 +345,38 @@ def test_constructor_dtypes_to_object(self, cast_idx, vals): [datetime(2011, 1, 1), datetime(2011, 1, 2)] ]) def test_constructor_dtypes_to_categorical(self, vals): - idx = Index(vals, dtype='category') - assert isinstance(idx, CategoricalIndex) + index = Index(vals, dtype='category') + assert isinstance(index, CategoricalIndex) - @pytest.mark.parametrize("cast_idx", [True, False]) + @pytest.mark.parametrize("cast_index", [True, False]) @pytest.mark.parametrize("vals", [ Index(np.array([np_datetime64_compat('2011-01-01'), np_datetime64_compat('2011-01-02')])), Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]) ]) - def test_constructor_dtypes_to_datetime(self, cast_idx, vals): - if cast_idx: - idx = Index(vals, dtype=object) - assert isinstance(idx, Index) - assert idx.dtype == object + def test_constructor_dtypes_to_datetime(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object else: - idx = Index(vals) - assert isinstance(idx, DatetimeIndex) + index = Index(vals) + assert isinstance(index, DatetimeIndex) - @pytest.mark.parametrize("cast_idx", [True, False]) + @pytest.mark.parametrize("cast_index", [True, False]) @pytest.mark.parametrize("vals", [ np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]), [timedelta(1), timedelta(1)] ]) - def test_constructor_dtypes_to_timedelta(self, cast_idx, vals): - if cast_idx: - idx = Index(vals, dtype=object) - assert isinstance(idx, Index) - assert idx.dtype == object + def test_constructor_dtypes_to_timedelta(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object else: - idx = Index(vals) - assert isinstance(idx, TimedeltaIndex) + index = Index(vals) + assert isinstance(index, TimedeltaIndex) @pytest.mark.parametrize("values", [ # pass values without timezone, as DatetimeIndex localizes it @@ -385,41 +385,41 @@ def test_constructor_dtypes_to_timedelta(self, cast_idx, vals): @pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex]) def test_constructor_dtypes_datetime(self, tz_naive_fixture, values, klass): - idx = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture) - dtype = idx.dtype + index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture) + dtype = index.dtype - res = klass(values, tz=tz_naive_fixture) - tm.assert_index_equal(res, idx) + result = klass(values, tz=tz_naive_fixture) + tm.assert_index_equal(result, index) - res = klass(values, dtype=dtype) - tm.assert_index_equal(res, idx) + result = klass(values, dtype=dtype) + tm.assert_index_equal(result, index) - res = klass(list(values), tz=tz_naive_fixture) - tm.assert_index_equal(res, idx) + result = klass(list(values), tz=tz_naive_fixture) + tm.assert_index_equal(result, index) - res = klass(list(values), dtype=dtype) - tm.assert_index_equal(res, idx) + result = klass(list(values), dtype=dtype) + tm.assert_index_equal(result, index) @pytest.mark.parametrize("attr", ['values', 'asi8']) @pytest.mark.parametrize("klass", [pd.Index, pd.TimedeltaIndex]) def test_constructor_dtypes_timedelta(self, attr, klass): - idx = pd.timedelta_range('1 days', periods=5) - dtype = idx.dtype + index = pd.timedelta_range('1 days', periods=5) + dtype = index.dtype - values = getattr(idx, attr) + values = getattr(index, attr) - res = klass(values, dtype=dtype) - tm.assert_index_equal(res, idx) + result = klass(values, dtype=dtype) + tm.assert_index_equal(result, index) - res = klass(list(values), dtype=dtype) - tm.assert_index_equal(res, idx) + result = klass(list(values), dtype=dtype) + tm.assert_index_equal(result, index) def test_constructor_empty_gen(self): skip_index_keys = ["repeats", "periodIndex", "rangeIndex", "tuples"] - for key, idx in self.generate_index_types(skip_index_keys): - empty = idx.__class__([]) - assert isinstance(empty, idx.__class__) + for key, index in self.generate_index_types(skip_index_keys): + empty = index.__class__([]) + assert isinstance(empty, index.__class__) assert not len(empty) @pytest.mark.parametrize("empty,klass", [ @@ -515,21 +515,21 @@ def test_insert_missing(self, nulls_fixture): result = Index(list('abc')).insert(1, nulls_fixture) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("pos,exp", [ - (0, Index(['b', 'c', 'd'], name='idx')), - (-1, Index(['a', 'b', 'c'], name='idx')) + @pytest.mark.parametrize("pos,expected", [ + (0, Index(['b', 'c', 'd'], name='index')), + (-1, Index(['a', 'b', 'c'], name='index')) ]) - def test_delete(self, pos, exp): - idx = Index(['a', 'b', 'c', 'd'], name='idx') - result = idx.delete(pos) - tm.assert_index_equal(result, exp) - assert result.name == exp.name + def test_delete(self, pos, expected): + index = Index(['a', 'b', 'c', 'd'], name='index') + result = index.delete(pos) + tm.assert_index_equal(result, expected) + assert result.name == expected.name def test_delete_raises(self): - idx = Index(['a', 'b', 'c', 'd'], name='idx') + index = Index(['a', 'b', 'c', 'd'], name='index') with pytest.raises((IndexError, ValueError)): # either depending on numpy version - idx.delete(5) + index.delete(5) def test_identical(self): @@ -585,9 +585,9 @@ def test_asof(self): assert isinstance(self.dateIndex.asof(d), Timestamp) def test_asof_datetime_partial(self): - idx = pd.date_range('2010-01-01', periods=2, freq='m') + index = pd.date_range('2010-01-01', periods=2, freq='m') expected = Timestamp('2010-02-28') - result = idx.asof('2010-02') + result = index.asof('2010-02') assert result == expected assert not isinstance(result, Index) @@ -601,9 +601,9 @@ def test_nanosecond_index_access(self): # this does not yet work, as parsing strings is done via dateutil # assert first_value == x['2013-01-01 00:00:00.000000050+0000'] - exp_ts = np_datetime64_compat('2013-01-01 00:00:00.000000050+0000', - 'ns') - assert first_value == x[Timestamp(exp_ts)] + expected_ts = np_datetime64_compat('2013-01-01 00:00:00.000000050+' + '0000', 'ns') + assert first_value == x[Timestamp(expected_ts)] @pytest.mark.parametrize("op", [ operator.eq, operator.ne, operator.gt, operator.lt, @@ -622,15 +622,15 @@ def test_comparators(self, op): tm.assert_numpy_array_equal(arr_result, index_result) def test_booleanindex(self): - boolIdx = np.repeat(True, len(self.strIndex)).astype(bool) - boolIdx[5:30:2] = False + boolIndex = np.repeat(True, len(self.strIndex)).astype(bool) + boolIndex[5:30:2] = False - subIndex = self.strIndex[boolIdx] + subIndex = self.strIndex[boolIndex] for i, val in enumerate(subIndex): assert subIndex.get_loc(val) == i - subIndex = self.strIndex[list(boolIdx)] + subIndex = self.strIndex[list(boolIndex)] for i, val in enumerate(subIndex): assert subIndex.get_loc(val) == i @@ -644,11 +644,11 @@ def test_fancy(self): @pytest.mark.parametrize("dtype", [np.int_, np.bool_]) def test_empty_fancy(self, attr, dtype): empty_arr = np.array([], dtype=dtype) - idx = getattr(self, attr) - empty_idx = idx.__class__([]) + index = getattr(self, attr) + empty_index = index.__class__([]) - assert idx[[]].identical(empty_idx) - assert idx[empty_arr].identical(empty_idx) + assert index[[]].identical(empty_index) + assert index[empty_arr].identical(empty_index) @pytest.mark.parametrize("attr", [ 'strIndex', 'intIndex', 'floatIndex']) @@ -656,12 +656,12 @@ def test_empty_fancy_raises(self, attr): # pd.DatetimeIndex is excluded, because it overrides getitem and should # be tested separately. empty_farr = np.array([], dtype=np.float_) - idx = getattr(self, attr) - empty_idx = idx.__class__([]) + index = getattr(self, attr) + empty_index = index.__class__([]) - assert idx[[]].identical(empty_idx) + assert index[[]].identical(empty_index) # np.ndarray only accepts ndarray of int & bool dtypes, so should Index - pytest.raises(IndexError, idx.__getitem__, empty_farr) + pytest.raises(IndexError, index.__getitem__, empty_farr) @pytest.mark.parametrize("itm", [101, 'no_int']) def test_getitem_error(self, indices, itm): @@ -678,85 +678,90 @@ def test_intersection(self): inter = first.intersection(first) assert inter is first - idx1 = Index([1, 2, 3, 4, 5], name='idx') - # if target has the same name, it is preserved - idx2 = Index([3, 4, 5, 6, 7], name='idx') - expected2 = Index([3, 4, 5], name='idx') - result2 = idx1.intersection(idx2) - tm.assert_index_equal(result2, expected2) - assert result2.name == expected2.name - - # if target name is different, it will be reset - idx3 = Index([3, 4, 5, 6, 7], name='other') - expected3 = Index([3, 4, 5], name=None) - result3 = idx1.intersection(idx3) - tm.assert_index_equal(result3, expected3) - assert result3.name == expected3.name - - # non monotonic - idx1 = Index([5, 3, 2, 4, 1], name='idx') - idx2 = Index([4, 7, 6, 5, 3], name='idx') - expected = Index([5, 3, 4], name='idx') - result = idx1.intersection(idx2) - tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("index2,keeps_name", [ + (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name + (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names + (Index([3, 4, 5, 6, 7]), False)]) + def test_intersection_name_preservation(self, index2, keeps_name): + index1 = Index([1, 2, 3, 4, 5], name='index') + expected = Index([3, 4, 5]) + result = index1.intersection(index2) - idx2 = Index([4, 7, 6, 5, 3], name='other') - expected = Index([5, 3, 4], name=None) - result = idx1.intersection(idx2) - tm.assert_index_equal(result, expected) + if keeps_name: + expected.name = 'index' - # non-monotonic non-unique - idx1 = Index(['A', 'B', 'A', 'C']) - idx2 = Index(['B', 'D']) - expected = Index(['B'], dtype='object') - result = idx1.intersection(idx2) + assert result.name == expected.name tm.assert_index_equal(result, expected) - idx2 = Index(['B', 'D', 'A']) - expected = Index(['A', 'B', 'A'], dtype='object') - result = idx1.intersection(idx2) - tm.assert_index_equal(result, expected) - - # preserve names + @pytest.mark.parametrize("first_name,second_name,expected_name", [ + ('A', 'A', 'A'), ('A', 'B', None), (None, 'B', None)]) + def test_intersection_name_preservation2(self, first_name, second_name, + expected_name): first = self.strIndex[5:20] second = self.strIndex[:10] - first.name = 'A' - second.name = 'A' + first.name = first_name + second.name = second_name intersect = first.intersection(second) - assert intersect.name == 'A' + assert intersect.name == expected_name - second.name = 'B' - intersect = first.intersection(second) - assert intersect.name is None + @pytest.mark.parametrize("index2,keeps_name", [ + (Index([4, 7, 6, 5, 3], name='index'), True), + (Index([4, 7, 6, 5, 3], name='other'), False)]) + def test_intersection_monotonic(self, index2, keeps_name): + index1 = Index([5, 3, 2, 4, 1], name='index') + expected = Index([5, 3, 4]) - first.name = None - second.name = 'B' - intersect = first.intersection(second) - assert intersect.name is None + if keeps_name: + expected.name = "index" + + result = index1.intersection(index2) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index2,expected_arr", [ + (Index(['B', 'D']), ['B']), + (Index(['B', 'D', 'A']), ['A', 'B', 'A'])]) + def test_intersection_non_monotonic_non_unique(self, index2, expected_arr): + # non-monotonic non-unique + index1 = Index(['A', 'B', 'A', 'C']) + expected = Index(expected_arr, dtype='object') + result = index1.intersection(index2) + tm.assert_index_equal(result, expected) def test_intersect_str_dates(self): dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] i1 = Index(dt_dates, dtype=object) i2 = Index(['aa'], dtype=object) - res = i2.intersection(i1) + result = i2.intersection(i1) - assert len(res) == 0 + assert len(result) == 0 def test_union(self): + # TODO: Replace with fixturesult first = self.strIndex[5:20] second = self.strIndex[:10] everything = self.strIndex[:20] + union = first.union(second) assert tm.equalContents(union, everything) + @pytest.mark.parametrize("klass", [ + np.array, Series, list]) + def test_union_from_iterables(self, klass): # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case) - assert tm.equalContents(result, everything) + # TODO: Replace with fixturesult + first = self.strIndex[5:20] + second = self.strIndex[:10] + everything = self.strIndex[:20] + + case = klass(second.values) + result = first.union(case) + assert tm.equalContents(result, everything) + + def test_union_identity(self): + # TODO: replace with fixturesult + first = self.strIndex[5:20] - # Corner cases union = first.union(first) assert union is first @@ -766,61 +771,22 @@ def test_union(self): union = Index([]).union(first) assert union is first - # preserve names - first = Index(list('ab'), name='A') - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index(list('ab'), name='A') - second = Index([], name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index([], name='A') - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name=None) - tm.assert_index_equal(union, expected) - - first = Index(list('ab')) - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name='B') - tm.assert_index_equal(union, expected) - - first = Index([]) - second = Index(list('ab'), name='B') - union = first.union(second) - expected = Index(list('ab'), name='B') - tm.assert_index_equal(union, expected) - - first = Index(list('ab')) - second = Index([], name='B') + @pytest.mark.parametrize("first_list", [list('ab'), list()]) + @pytest.mark.parametrize("second_list", [list('ab'), list()]) + @pytest.mark.parametrize("first_name, second_name, expected_name", [ + ('A', 'B', None), (None, 'B', 'B'), ('A', None, 'A')]) + def test_union_name_preservation(self, first_list, second_list, first_name, + second_name, expected_name): + first = Index(first_list, name=first_name) + second = Index(second_list, name=second_name) union = first.union(second) - expected = Index(list('ab'), name='B') - tm.assert_index_equal(union, expected) - first = Index(list('ab'), name='A') - second = Index(list('ab')) - union = first.union(second) - expected = Index(list('ab'), name='A') - tm.assert_index_equal(union, expected) - - first = Index(list('ab'), name='A') - second = Index([]) - union = first.union(second) - expected = Index(list('ab'), name='A') - tm.assert_index_equal(union, expected) - - first = Index([], name='A') - second = Index(list('ab')) - union = first.union(second) - expected = Index(list('ab'), name='A') + vals = sorted(set(first_list).union(second_list)) + expected = Index(vals, name=expected_name) tm.assert_index_equal(union, expected) + def test_union_dt_as_obj(self): + # TODO: Replace with fixturesult with tm.assert_produces_warning(RuntimeWarning): firstCat = self.strIndex.union(self.dateIndex) secondCat = self.strIndex.union(self.strIndex) @@ -837,28 +803,29 @@ def test_union(self): tm.assert_contains_all(self.dateIndex, firstCat) def test_add(self): - idx = self.strIndex + index = self.strIndex expected = Index(self.strIndex.values * 2) - tm.assert_index_equal(idx + idx, expected) - tm.assert_index_equal(idx + idx.tolist(), expected) - tm.assert_index_equal(idx.tolist() + idx, expected) + tm.assert_index_equal(index + index, expected) + tm.assert_index_equal(index + index.tolist(), expected) + tm.assert_index_equal(index.tolist() + index, expected) # test add and radd - idx = Index(list('abc')) + index = Index(list('abc')) expected = Index(['a1', 'b1', 'c1']) - tm.assert_index_equal(idx + '1', expected) + tm.assert_index_equal(index + '1', expected) expected = Index(['1a', '1b', '1c']) - tm.assert_index_equal('1' + idx, expected) + tm.assert_index_equal('1' + index, expected) def test_sub(self): - idx = self.strIndex - pytest.raises(TypeError, lambda: idx - 'a') - pytest.raises(TypeError, lambda: idx - idx) - pytest.raises(TypeError, lambda: idx - idx.tolist()) - pytest.raises(TypeError, lambda: idx.tolist() - idx) + index = self.strIndex + pytest.raises(TypeError, lambda: index - 'a') + pytest.raises(TypeError, lambda: index - index) + pytest.raises(TypeError, lambda: index - index.tolist()) + pytest.raises(TypeError, lambda: index.tolist() - index) def test_map_identity_mapping(self): # GH 12766 + # TODO: replace with fixture for name, cur_index in self.indices.items(): tm.assert_index_equal(cur_index, cur_index.map(lambda x: x)) @@ -867,17 +834,18 @@ def test_map_with_tuples(self): # Test that returning a single tuple from an Index # returns an Index. - idx = tm.makeIntIndex(3) + index = tm.makeIntIndex(3) result = tm.makeIntIndex(3).map(lambda x: (x,)) - expected = Index([(i,) for i in idx]) + expected = Index([(i,) for i in index]) tm.assert_index_equal(result, expected) # Test that returning a tuple from a map of a single index # returns a MultiIndex object. - result = idx.map(lambda x: (x, x == 1)) - expected = MultiIndex.from_tuples([(i, i == 1) for i in idx]) + result = index.map(lambda x: (x, x == 1)) + expected = MultiIndex.from_tuples([(i, i == 1) for i in index]) tm.assert_index_equal(result, expected) + def test_map_with_tuples_mi(self): # Test that returning a single object from a MultiIndex # returns an Index. first_level = ['foo', 'bar', 'baz'] @@ -885,20 +853,18 @@ def test_map_with_tuples(self): reduced_index = multi_index.map(lambda x: x[0]) tm.assert_index_equal(reduced_index, Index(first_level)) - def test_map_tseries_indices_return_index(self): - date_index = tm.makeDateIndex(10) - exp = Index([1] * 10) - tm.assert_index_equal(exp, date_index.map(lambda x: 1)) - - period_index = tm.makePeriodIndex(10) - tm.assert_index_equal(exp, period_index.map(lambda x: 1)) - - tdelta_index = tm.makeTimedeltaIndex(10) - tm.assert_index_equal(exp, tdelta_index.map(lambda x: 1)) - + @pytest.mark.parametrize("attr", [ + 'makeDateIndex', 'makePeriodIndex', 'makeTimedeltaIndex']) + def test_map_tseries_indices_return_index(self, attr): + index = getattr(tm, attr)(10) + expected = Index([1] * 10) + result = index.map(lambda x: 1) + tm.assert_index_equal(expected, result) + + def test_map_tseries_indices_accsr_return_index(self): date_index = tm.makeDateIndex(24, freq='h', name='hourly') - exp = Index(range(24), name='hourly') - tm.assert_index_equal(exp, date_index.map(lambda x: x.hour)) + expected = Index(range(24), name='hourly') + tm.assert_index_equal(expected, date_index.map(lambda x: x.hour)) @pytest.mark.parametrize( "mapper", @@ -908,10 +874,11 @@ def test_map_tseries_indices_return_index(self): def test_map_dictlike(self, mapper): # GH 12756 expected = Index(['foo', 'bar', 'baz']) - idx = tm.makeIntIndex(3) - result = idx.map(mapper(expected.values, idx)) + index = tm.makeIntIndex(3) + result = index.map(mapper(expected.values, index)) tm.assert_index_equal(result, expected) + # TODO: replace with fixture for name in self.indices.keys(): if name == 'catIndex': # Tested in test_categorical @@ -930,29 +897,28 @@ def test_map_dictlike(self, mapper): result = index.map(mapper(expected, index)) tm.assert_index_equal(result, expected) - def test_map_with_non_function_missing_values(self): + @pytest.mark.parametrize("mapper", [ + Series(['foo', 2., 'baz'], index=[0, 2, -1]), + {0: 'foo', 2: 2.0, -1: 'baz'}]) + def test_map_with_non_function_missing_values(self, mapper): # GH 12756 expected = Index([2., np.nan, 'foo']) - input = Index([2, 1, 0]) - - mapper = Series(['foo', 2., 'baz'], index=[0, 2, -1]) - tm.assert_index_equal(expected, input.map(mapper)) + result = Index([2, 1, 0]).map(mapper) - mapper = {0: 'foo', 2: 2.0, -1: 'baz'} - tm.assert_index_equal(expected, input.map(mapper)) + tm.assert_index_equal(expected, result) def test_map_na_exclusion(self): - idx = Index([1.5, np.nan, 3, np.nan, 5]) + index = Index([1.5, np.nan, 3, np.nan, 5]) - result = idx.map(lambda x: x * 2, na_action='ignore') - exp = idx * 2 - tm.assert_index_equal(result, exp) + result = index.map(lambda x: x * 2, na_action='ignore') + expected = index * 2 + tm.assert_index_equal(result, expected) def test_map_defaultdict(self): - idx = Index([1, 2, 3]) + index = Index([1, 2, 3]) default_dict = defaultdict(lambda: 'blank') default_dict[1] = 'stuff' - result = idx.map(default_dict) + result = index.map(default_dict) expected = Index(['stuff', 'blank', 'blank']) tm.assert_index_equal(result, expected) @@ -967,18 +933,14 @@ def test_append_multiple(self): result = index.append([]) tm.assert_index_equal(result, index) - def test_append_empty_preserve_name(self): - left = Index([], name='foo') - right = Index([1, 2, 3], name='foo') - - result = left.append(right) - assert result.name == 'foo' - + @pytest.mark.parametrize("name,expected", [ + ('foo', 'foo'), ('bar', None)]) + def test_append_empty_preserve_name(self, name, expected): left = Index([], name='foo') - right = Index([1, 2, 3], name='bar') + right = Index([1, 2, 3], name=name) result = left.append(right) - assert result.name is None + assert result.name == expected def test_add_string(self): # from bug report @@ -996,78 +958,82 @@ def test_iadd_string(self): index += '_x' assert 'a_x' in index - def test_difference(self): - + @pytest.mark.parametrize("second_name,expected", [ + (None, None), ('name', 'name')]) + def test_difference_name_preservation(self, second_name, expected): + # TODO: replace with fixturesult first = self.strIndex[5:20] second = self.strIndex[:10] answer = self.strIndex[10:20] + first.name = 'name' - # different names + second.name = second_name result = first.difference(second) assert tm.equalContents(result, answer) - assert result.name is None - # same names - second.name = 'name' - result = first.difference(second) - assert result.name == 'name' + if expected is None: + assert result.name is None + else: + assert result.name == expected - # with empty + def test_difference_empty_arg(self): + first = self.strIndex[5:20] + first.name == 'name' result = first.difference([]) + assert tm.equalContents(result, first) assert result.name == first.name - # with everything + def test_difference_identity(self): + first = self.strIndex[5:20] + first.name == 'name' result = first.difference(first) + assert len(result) == 0 assert result.name == first.name def test_symmetric_difference(self): # smoke - idx1 = Index([1, 2, 3, 4], name='idx1') - idx2 = Index([2, 3, 4, 5]) - result = idx1.symmetric_difference(idx2) + index1 = Index([1, 2, 3, 4], name='index1') + index2 = Index([2, 3, 4, 5]) + result = index1.symmetric_difference(index2) expected = Index([1, 5]) assert tm.equalContents(result, expected) assert result.name is None # __xor__ syntax - expected = idx1 ^ idx2 + expected = index1 ^ index2 assert tm.equalContents(result, expected) assert result.name is None - # multiIndex - idx1 = MultiIndex.from_tuples(self.tuples) - idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) - result = idx1.symmetric_difference(idx2) + def test_symmetric_difference_mi(self): + index1 = MultiIndex.from_tuples(self.tuples) + index2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) + result = index1.symmetric_difference(index2) expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) assert tm.equalContents(result, expected) - # nans: + @pytest.mark.parametrize("index2,expected", [ + (Index([0, 1, np.nan]), Index([0.0, 2.0, 3.0])), + (Index([0, 1]), Index([0.0, 2.0, 3.0, np.nan]))]) + def test_symmetric_difference_missing(self, index2, expected): # GH 13514 change: {nan} - {nan} == {} # (GH 6444, sorting of nans, is no longer an issue) - idx1 = Index([1, np.nan, 2, 3]) - idx2 = Index([0, 1, np.nan]) - idx3 = Index([0, 1]) + index1 = Index([1, np.nan, 2, 3]) - result = idx1.symmetric_difference(idx2) - expected = Index([0.0, 2.0, 3.0]) + result = index1.symmetric_difference(index2) tm.assert_index_equal(result, expected) - result = idx1.symmetric_difference(idx3) - expected = Index([0.0, 2.0, 3.0, np.nan]) - tm.assert_index_equal(result, expected) - - # other not an Index: - idx1 = Index([1, 2, 3, 4], name='idx1') - idx2 = np.array([2, 3, 4, 5]) + def test_symmetric_difference_non_index(self): + index1 = Index([1, 2, 3, 4], name='index1') + index2 = np.array([2, 3, 4, 5]) expected = Index([1, 5]) - result = idx1.symmetric_difference(idx2) + result = index1.symmetric_difference(index2) assert tm.equalContents(result, expected) - assert result.name == 'idx1' + assert result.name == 'index1' - result = idx1.symmetric_difference(idx2, result_name='new_name') + result = index1.symmetric_difference(index2, result_name='new_name') assert tm.equalContents(result, expected) assert result.name == 'new_name' @@ -1076,9 +1042,9 @@ def test_difference_type(self): # If taking difference of a set and itself, it # needs to preserve the type of the index skip_index_keys = ['repeats'] - for key, idx in self.generate_index_types(skip_index_keys): - result = idx.difference(idx) - expected = idx.drop(idx) + for key, index in self.generate_index_types(skip_index_keys): + result = index.difference(index) + expected = index.drop(index) tm.assert_index_equal(result, expected) def test_intersection_difference(self): @@ -1087,30 +1053,28 @@ def test_intersection_difference(self): # empty index produces the same index as the difference # of an index with itself. Test for all types skip_index_keys = ['repeats'] - for key, idx in self.generate_index_types(skip_index_keys): - inter = idx.intersection(idx.drop(idx)) - diff = idx.difference(idx) + for key, index in self.generate_index_types(skip_index_keys): + inter = index.intersection(index.drop(index)) + diff = index.difference(index) tm.assert_index_equal(inter, diff) - def test_is_numeric(self): - assert not self.dateIndex.is_numeric() - assert not self.strIndex.is_numeric() - assert self.intIndex.is_numeric() - assert self.floatIndex.is_numeric() - assert not self.catIndex.is_numeric() - - def test_is_object(self): - assert self.strIndex.is_object() - assert self.boolIndex.is_object() - assert not self.catIndex.is_object() - assert not self.intIndex.is_object() - assert not self.dateIndex.is_object() - assert not self.floatIndex.is_object() - - def test_is_all_dates(self): - assert self.dateIndex.is_all_dates - assert not self.strIndex.is_all_dates - assert not self.intIndex.is_all_dates + @pytest.mark.parametrize("attr,expected", [ + ('strIndex', False), ('boolIndex', False), ('catIndex', False), + ('intIndex', True), ('dateIndex', False), ('floatIndex', True)]) + def test_is_numeric(self, attr, expected): + assert getattr(self, attr).is_numeric() == expected + + @pytest.mark.parametrize("attr,expected", [ + ('strIndex', True), ('boolIndex', True), ('catIndex', False), + ('intIndex', False), ('dateIndex', False), ('floatIndex', False)]) + def test_is_object(self, attr, expected): + assert getattr(self, attr).is_object() == expected + + @pytest.mark.parametrize("attr,expected", [ + ('strIndex', False), ('boolIndex', False), ('catIndex', False), + ('intIndex', False), ('dateIndex', True), ('floatIndex', False)]) + def test_is_all_dates(self, attr, expected): + assert getattr(self, attr).is_all_dates == expected def test_summary(self): self._check_method_works(Index._summary) @@ -1142,19 +1106,21 @@ def test_format(self): expected = [str(index[0])] assert formatted == expected + self.strIndex[:0].format() + + @pytest.mark.parametrize("vals", [ + [1, 2.0 + 3.0j, 4.], ['a', 'b', 'c']]) + def test_format_missing(self, vals, nulls_fixture): # 2845 - index = Index([1, 2.0 + 3.0j, np.nan]) - formatted = index.format() - expected = [str(index[0]), str(index[1]), u('NaN')] - assert formatted == expected + vals = list(vals) # Copy for each iteration + vals.append(nulls_fixture) + index = Index(vals) - # is this really allowed? - index = Index([1, 2.0 + 3.0j, None]) formatted = index.format() - expected = [str(index[0]), str(index[1]), u('NaN')] - assert formatted == expected + expected = [str(index[0]), str(index[1]), str(index[2]), u('NaN')] - self.strIndex[:0].format() + assert formatted == expected + assert index[3] is nulls_fixture def test_format_with_name_time_info(self): # bug I fixed 12/20/2011 @@ -1172,19 +1138,13 @@ def test_format_datetime_with_time(self): assert len(result) == 2 assert result == expected - def test_format_none(self): - values = ['a', 'b', 'c', None] - - idx = Index(values) - idx.format() - assert idx[3] is None - - def test_logical_compat(self): - idx = self.create_index() - assert idx.all() == idx.values.all() - assert idx.any() == idx.values.any() + @pytest.mark.parametrize("op", ['any', 'all']) + def test_logical_compat(self, op): + index = self.create_index() + assert getattr(index, op)() == getattr(index.values, op)() def _check_method_works(self, method): + # TODO: make this a dedicated test with parametrized methods method(self.empty) method(self.dateIndex) method(self.unicodeIndex) @@ -1194,41 +1154,39 @@ def _check_method_works(self, method): method(self.catIndex) def test_get_indexer(self): - idx1 = Index([1, 2, 3, 4, 5]) - idx2 = Index([2, 4, 6]) - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) - r1 = idx2.get_indexer(idx1, method='pad') - e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + r1 = index1.get_indexer(index2) + e1 = np.array([1, 3, -1], dtype=np.intp) assert_almost_equal(r1, e1) - r2 = idx2.get_indexer(idx1[::-1], method='pad') - assert_almost_equal(r2, e1[::-1]) + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize("expected,method", [ + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), 'pad'), + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), 'ffill'), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), 'backfill'), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), 'bfill')]) + def test_get_indexer_methods(self, reverse, expected, method): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) - rffill1 = idx2.get_indexer(idx1, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx2.get_indexer(idx1, method='backfill') - e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) - assert_almost_equal(r1, e1) + if reverse: + index1 = index1[::-1] + expected = expected[::-1] - rbfill1 = idx2.get_indexer(idx1, method='bfill') - assert_almost_equal(r1, rbfill1) - - r2 = idx2.get_indexer(idx1[::-1], method='backfill') - assert_almost_equal(r2, e1[::-1]) + result = index2.get_indexer(index1, method=method) + assert_almost_equal(result, expected) def test_get_indexer_invalid(self): # GH10411 - idx = Index(np.arange(10)) + index = Index(np.arange(10)) with tm.assert_raises_regex(ValueError, 'tolerance argument'): - idx.get_indexer([1, 0], tolerance=1) + index.get_indexer([1, 0], tolerance=1) with tm.assert_raises_regex(ValueError, 'limit argument'): - idx.get_indexer([1, 0], limit=1) + index.get_indexer([1, 0], limit=1) @pytest.mark.parametrize( 'method, tolerance, indexer, expected', @@ -1251,9 +1209,9 @@ def test_get_indexer_invalid(self): ('backfill', 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), ('nearest', 0.2, [0.2, 1.8, 8.5], [0, 2, -1])]) def test_get_indexer_nearest(self, method, tolerance, indexer, expected): - idx = Index(np.arange(10)) + index = Index(np.arange(10)) - actual = idx.get_indexer(indexer, method=method, tolerance=tolerance) + actual = index.get_indexer(indexer, method=method, tolerance=tolerance) tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) @@ -1266,57 +1224,54 @@ def test_get_indexer_nearest(self, method, tolerance, indexer, expected): [-1, 2, 9]]))) def test_get_indexer_nearest_listlike_tolerance(self, tolerance, expected, listtype): - idx = Index(np.arange(10)) + index = Index(np.arange(10)) - actual = idx.get_indexer([0.2, 1.8, 8.5], method='nearest', - tolerance=listtype(tolerance)) + actual = index.get_indexer([0.2, 1.8, 8.5], method='nearest', + tolerance=listtype(tolerance)) tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) def test_get_indexer_nearest_error(self): - idx = Index(np.arange(10)) + index = Index(np.arange(10)) with tm.assert_raises_regex(ValueError, 'limit argument'): - idx.get_indexer([1, 0], method='nearest', limit=1) + index.get_indexer([1, 0], method='nearest', limit=1) with pytest.raises(ValueError, match='tolerance size must match'): - idx.get_indexer([1, 0], method='nearest', - tolerance=[1, 2, 3]) + index.get_indexer([1, 0], method='nearest', + tolerance=[1, 2, 3]) - def test_get_indexer_nearest_decreasing(self): - idx = Index(np.arange(10))[::-1] + @pytest.mark.parametrize("method,expected", [ + ('pad', [8, 7, 0]), ('backfill', [9, 8, 1]), ('nearest', [9, 7, 0])]) + def test_get_indexer_nearest_decreasing(self, method, expected): + index = Index(np.arange(10))[::-1] - all_methods = ['pad', 'backfill', 'nearest'] - for method in all_methods: - actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], - dtype=np.intp)) + actual = index.get_indexer([0, 5, 9], method=method) + tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp)) - for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1], - [9, 7, 0]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) + actual = index.get_indexer([0.2, 1.8, 8.5], method=method) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) - def test_get_indexer_strings(self): - idx = pd.Index(['b', 'c']) + @pytest.mark.parametrize("method,expected", [ + ('pad', np.array([-1, 0, 1, 1], dtype=np.intp)), + ('backfill', np.array([0, 0, 1, -1], dtype=np.intp))]) + def test_get_indexer_strings(self, method, expected): + index = pd.Index(['b', 'c']) + actual = index.get_indexer(['a', 'b', 'c', 'd'], method=method) - actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='pad') - expected = np.array([-1, 0, 1, 1], dtype=np.intp) tm.assert_numpy_array_equal(actual, expected) - actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='backfill') - expected = np.array([0, 0, 1, -1], dtype=np.intp) - tm.assert_numpy_array_equal(actual, expected) + def test_get_indexer_strings_raises(self): + index = pd.Index(['b', 'c']) with pytest.raises(TypeError): - idx.get_indexer(['a', 'b', 'c', 'd'], method='nearest') + index.get_indexer(['a', 'b', 'c', 'd'], method='nearest') with pytest.raises(TypeError): - idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + index.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) with pytest.raises(TypeError): - idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', - tolerance=[2, 2, 2, 2]) + index.get_indexer(['a', 'b', 'c', 'd'], method='pad', + tolerance=[2, 2, 2, 2]) def test_get_indexer_numeric_index_boolean_target(self): # GH 16877