diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9eae2b7a33923..2279d0464a5c7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -385,7 +385,7 @@ Deprecations - ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) - ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - +- importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/__init__.py b/pandas/__init__.py index 9133e11beaa2b..76542db22a757 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -42,10 +42,10 @@ from pandas.sparse.api import * from pandas.stats.api import * from pandas.tseries.api import * -from pandas.io.api import * from pandas.computation.api import * -from pandas.tools.merge import (merge, concat, ordered_merge, +from pandas.tools.concat import concat +from pandas.tools.merge import (merge, ordered_merge, merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab from pandas.tools.plotting import scatter_matrix, plot_params @@ -54,6 +54,8 @@ from pandas.core.reshape import melt from pandas.util.print_versions import show_versions +from pandas.io.api import * + # define the testing framework import pandas.util.testing from pandas.util.nosetester import NoseTester diff --git a/pandas/core/base.py b/pandas/core/base.py index 657da859ddde2..92ec6bb3d73e6 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -472,7 +472,7 @@ def _aggregate(self, arg, *args, **kwargs): arg = new_arg - from pandas.tools.merge import concat + from pandas.tools.concat import concat def _agg_1dim(name, how, subset=None): """ @@ -579,7 +579,7 @@ def _agg(arg, func): return result, True def _aggregate_multiple_funcs(self, arg, _level): - from pandas.tools.merge import concat + from pandas.tools.concat import concat if self.axis != 0: raise NotImplementedError("axis other than 0 is not supported") diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 5980f872f951f..491db2e080953 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1907,7 +1907,7 @@ def describe(self): counts = self.value_counts(dropna=False) freqs = counts / float(counts.sum()) - from pandas.tools.merge import concat + from pandas.tools.concat import concat result = concat([counts, freqs], axis=1) result.columns = ['counts', 'freqs'] result.index.name = 'categories' diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79bdad82af5a3..aa03bfb9a54b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4402,7 +4402,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): if (self.columns.get_indexer(other.columns) >= 0).all(): other = other.loc[:, self.columns] - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(other, (list, tuple)): to_concat = [self] + other else: @@ -4532,7 +4532,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): - from pandas.tools.merge import merge, concat + from pandas.tools.merge import merge + from pandas.tools.concat import concat if isinstance(other, Series): if other.name is None: @@ -4636,7 +4637,7 @@ def round(self, decimals=0, *args, **kwargs): Series.round """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat def _dict_round(df, decimals): for col, vals in df.iteritems(): @@ -5306,7 +5307,7 @@ def isin(self, values): """ if isinstance(values, dict): from collections import defaultdict - from pandas.tools.merge import concat + from pandas.tools.concat import concat values = defaultdict(list, values) return concat((self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns)), axis=1) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 99220232114ce..53b6dbe6075cf 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -854,7 +854,7 @@ def _wrap_applied_output(self, *args, **kwargs): raise AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): - from pandas.tools.merge import concat + from pandas.tools.concat import concat def reset_identity(values): # reset the identities of the components @@ -3507,7 +3507,7 @@ def first_non_None_value(values): # still a series # path added as of GH 5545 elif all_indexed_same: - from pandas.tools.merge import concat + from pandas.tools.concat import concat return concat(values) if not all_indexed_same: @@ -3540,7 +3540,7 @@ def first_non_None_value(values): else: # GH5788 instead of stacking; concat gets the # dtypes correct - from pandas.tools.merge import concat + from pandas.tools.concat import concat result = concat(values, keys=key_index, names=key_index.names, axis=self.axis).unstack() @@ -3588,7 +3588,7 @@ def first_non_None_value(values): not_indexed_same=not_indexed_same) def _transform_general(self, func, *args, **kwargs): - from pandas.tools.merge import concat + from pandas.tools.concat import concat applied = [] obj = self._obj_with_exclusions @@ -3980,7 +3980,7 @@ def _iterate_column_groupbys(self): exclusions=self.exclusions) def _apply_to_column_groupbys(self, func): - from pandas.tools.merge import concat + from pandas.tools.concat import concat return concat( (func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()), @@ -4061,7 +4061,7 @@ def groupby_series(obj, col=None): if isinstance(obj, Series): results = groupby_series(obj) else: - from pandas.tools.merge import concat + from pandas.tools.concat import concat results = [groupby_series(obj[col], col) for col in obj.columns] results = concat(results, axis=1) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 6da10305eb4fc..4a6c6cf291316 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1282,7 +1282,7 @@ def join(self, other, how='left', lsuffix='', rsuffix=''): ------- joined : Panel """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index d6287f17c8387..bd0358abf67d5 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1194,7 +1194,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, -------- Series.str.get_dummies """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat from itertools import cycle if isinstance(data, DataFrame): diff --git a/pandas/core/series.py b/pandas/core/series.py index 43f16f690692a..e1eac8f66017e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1588,7 +1588,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(to_append, (list, tuple)): to_concat = [self] + to_append diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 439b96d650204..1a7a06199ad8a 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -165,7 +165,7 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, self._chk_truncate() def _chk_truncate(self): - from pandas.tools.merge import concat + from pandas.tools.concat import concat max_rows = self.max_rows truncate_v = max_rows and (len(self.series) > max_rows) series = self.series @@ -406,7 +406,7 @@ def _chk_truncate(self): Checks whether the frame should be truncated. If so, slices the frame up. """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat # Column of which first element is used to determine width of a dot col self.tr_size_col = -1 diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 76c228418a616..169a2b1df9b4c 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -10,9 +10,7 @@ import numpy as np from distutils.version import StrictVersion -from pandas import compat -from pandas.core.api import DataFrame -from pandas.tools.merge import concat +from pandas import compat, DataFrame, concat from pandas.core.common import PandasError from pandas.compat import lzip, bytes_to_str diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 7b4800115b23b..7661cd19e4d14 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -26,13 +26,12 @@ import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, isnull) + MultiIndex, Int64Index, isnull, concat, + SparseSeries, SparseDataFrame, PeriodIndex, + DatetimeIndex, TimedeltaIndex) from pandas.core import config from pandas.io.common import _stringify_path -from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex -from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex from pandas.core.base import StringMixin from pandas.formats.printing import adjoin, pprint_thing from pandas.core.common import _asarray_tuplesafe, PerformanceWarning @@ -42,7 +41,6 @@ _block2d_to_blocknd, _factor_indexer, _block_shape) from pandas.core.index import _ensure_index -from pandas.tools.merge import concat from pandas import compat from pandas.compat import u_safe as u, PY3, range, lrange, string_types, text_type, filter from pandas.core.config import get_option diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 458e869130190..53f85349834ac 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6,7 +6,8 @@ from numpy import nan from pandas import (date_range, bdate_range, Timestamp, - isnull, Index, MultiIndex, DataFrame, Series) + isnull, Index, MultiIndex, DataFrame, Series, + concat, Panel) from pandas.core.common import UnsupportedFunctionCall from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, @@ -14,8 +15,6 @@ from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict, product as cart_product) from pandas import compat -from pandas.core.panel import Panel -from pandas.tools.merge import concat from collections import defaultdict import pandas.core.common as com import numpy as np diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py new file mode 100644 index 0000000000000..dbbc831b19d1d --- /dev/null +++ b/pandas/tools/concat.py @@ -0,0 +1,615 @@ +""" +concat routines +""" + +import numpy as np +from pandas import compat, DataFrame, Series, Index, MultiIndex +from pandas.core.index import (_get_combined_index, + _ensure_index, _get_consensus_names, + _all_indexes_same) +from pandas.core.categorical import (_factorize_from_iterable, + _factorize_from_iterables) +from pandas.core.internals import concatenate_block_managers +from pandas.core import common as com +from pandas.core.generic import NDFrame +import pandas.types.concat as _concat + +# --------------------------------------------------------------------- +# Concatenate DataFrame objects + + +def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, + keys=None, levels=None, names=None, verify_integrity=False, + copy=True): + """ + Concatenate pandas objects along a particular axis with optional set logic + along the other axes. + + Can also add a layer of hierarchical indexing on the concatenation axis, + which may be useful if the labels are the same (or overlapping) on + the passed axis number. + + Parameters + ---------- + objs : a sequence or mapping of Series, DataFrame, or Panel objects + If a dict is passed, the sorted keys will be used as the `keys` + argument, unless it is passed, in which case the values will be + selected (see below). Any None objects will be dropped silently unless + they are all None in which case a ValueError will be raised + axis : {0/'index', 1/'columns'}, default 0 + The axis to concatenate along + join : {'inner', 'outer'}, default 'outer' + How to handle indexes on other axis(es) + join_axes : list of Index objects + Specific indexes to use for the other n - 1 axes instead of performing + inner/outer set logic + ignore_index : boolean, default False + If True, do not use the index values along the concatenation axis. The + resulting axis will be labeled 0, ..., n - 1. This is useful if you are + concatenating objects where the concatenation axis does not have + meaningful indexing information. Note the index values on the other + axes are still respected in the join. + keys : sequence, default None + If multiple levels passed, should contain tuples. Construct + hierarchical index using the passed keys as the outermost level + levels : list of sequences, default None + Specific levels (unique values) to use for constructing a + MultiIndex. Otherwise they will be inferred from the keys + names : list, default None + Names for the levels in the resulting hierarchical index + verify_integrity : boolean, default False + Check whether the new concatenated axis contains duplicates. This can + be very expensive relative to the actual data concatenation + copy : boolean, default True + If False, do not copy data unnecessarily + + Returns + ------- + concatenated : type of objects + + Notes + ----- + The keys, levels, and names arguments are all optional. + + A walkthrough of how this method fits in with other tools for combining + panda objects can be found `here + `__. + + See Also + -------- + Series.append + DataFrame.append + DataFrame.join + DataFrame.merge + + Examples + -------- + Combine two ``Series``. + + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + dtype: object + + Clear the existing index and reset it in the result + by setting the ``ignore_index`` option to ``True``. + + >>> pd.concat([s1, s2], ignore_index=True) + 0 a + 1 b + 2 c + 3 d + dtype: object + + Add a hierarchical index at the outermost level of + the data with the ``keys`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2',]) + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Label the index keys you create with the ``names`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2'], + ... names=['Series name', 'Row ID']) + Series name Row ID + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Combine two ``DataFrame`` objects with identical columns. + + >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], + ... columns=['letter', 'number']) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], + ... columns=['letter', 'number']) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return everything. Columns outside the intersection will + be filled with ``NaN`` values. + + >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal']) + >>> df3 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df3]) + animal letter number + 0 NaN a 1 + 1 NaN b 2 + 0 cat c 3 + 1 dog d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return only those that are shared by passing ``inner`` to + the ``join`` keyword argument. + + >>> pd.concat([df1, df3], join="inner") + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects horizontally along the x axis by + passing in ``axis=1``. + + >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], + ... columns=['animal', 'name']) + >>> pd.concat([df1, df4], axis=1) + letter number animal name + 0 a 1 bird polly + 1 b 2 monkey george + + Prevent the result from including duplicate index values with the + ``verify_integrity`` option. + + >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 + 0 + a 1 + >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 + 0 + a 2 + >>> pd.concat([df5, df6], verify_integrity=True) + ValueError: Indexes have overlapping values: ['a'] + """ + op = _Concatenator(objs, axis=axis, join_axes=join_axes, + ignore_index=ignore_index, join=join, + keys=keys, levels=levels, names=names, + verify_integrity=verify_integrity, + copy=copy) + return op.get_result() + + +class _Concatenator(object): + """ + Orchestrates a concatenation operation for BlockManagers + """ + + def __init__(self, objs, axis=0, join='outer', join_axes=None, + keys=None, levels=None, names=None, + ignore_index=False, verify_integrity=False, copy=True): + if isinstance(objs, (NDFrame, compat.string_types)): + raise TypeError('first argument must be an iterable of pandas ' + 'objects, you passed an object of type ' + '"{0}"'.format(type(objs).__name__)) + + if join == 'outer': + self.intersect = False + elif join == 'inner': + self.intersect = True + else: # pragma: no cover + raise ValueError('Only can inner (intersect) or outer (union) ' + 'join the other axis') + + if isinstance(objs, dict): + if keys is None: + keys = sorted(objs) + objs = [objs[k] for k in keys] + else: + objs = list(objs) + + if len(objs) == 0: + raise ValueError('No objects to concatenate') + + if keys is None: + objs = [obj for obj in objs if obj is not None] + else: + # #1649 + clean_keys = [] + clean_objs = [] + for k, v in zip(keys, objs): + if v is None: + continue + clean_keys.append(k) + clean_objs.append(v) + objs = clean_objs + name = getattr(keys, 'name', None) + keys = Index(clean_keys, name=name) + + if len(objs) == 0: + raise ValueError('All objects passed were None') + + # consolidate data & figure out what our result ndim is going to be + ndims = set() + for obj in objs: + if not isinstance(obj, NDFrame): + raise TypeError("cannot concatenate a non-NDFrame object") + + # consolidate + obj.consolidate(inplace=True) + ndims.add(obj.ndim) + + # get the sample + # want the higest ndim that we have, and must be non-empty + # unless all objs are empty + sample = None + if len(ndims) > 1: + max_ndim = max(ndims) + for obj in objs: + if obj.ndim == max_ndim and np.sum(obj.shape): + sample = obj + break + + else: + # filter out the empties if we have not multi-index possibiltes + # note to keep empty Series as it affect to result columns / name + non_empties = [obj for obj in objs + if sum(obj.shape) > 0 or isinstance(obj, Series)] + + if (len(non_empties) and (keys is None and names is None and + levels is None and join_axes is None)): + objs = non_empties + sample = objs[0] + + if sample is None: + sample = objs[0] + self.objs = objs + + # Standardize axis parameter to int + if isinstance(sample, Series): + axis = DataFrame()._get_axis_number(axis) + else: + axis = sample._get_axis_number(axis) + + # Need to flip BlockManager axis in the DataFrame special case + self._is_frame = isinstance(sample, DataFrame) + if self._is_frame: + axis = 1 if axis == 0 else 0 + + self._is_series = isinstance(sample, Series) + if not 0 <= axis <= sample.ndim: + raise AssertionError("axis must be between 0 and {0}, " + "input was {1}".format(sample.ndim, axis)) + + # if we have mixed ndims, then convert to highest ndim + # creating column numbers as needed + if len(ndims) > 1: + current_column = 0 + max_ndim = sample.ndim + self.objs, objs = [], self.objs + for obj in objs: + + ndim = obj.ndim + if ndim == max_ndim: + pass + + elif ndim != max_ndim - 1: + raise ValueError("cannot concatenate unaligned mixed " + "dimensional NDFrame objects") + + else: + name = getattr(obj, 'name', None) + if ignore_index or name is None: + name = current_column + current_column += 1 + + # doing a row-wise concatenation so need everything + # to line up + if self._is_frame and axis == 1: + name = 0 + obj = sample._constructor({name: obj}) + + self.objs.append(obj) + + # note: this is the BlockManager axis (since DataFrame is transposed) + self.axis = axis + self.join_axes = join_axes + self.keys = keys + self.names = names or getattr(keys, 'names', None) + self.levels = levels + + self.ignore_index = ignore_index + self.verify_integrity = verify_integrity + self.copy = copy + + self.new_axes = self._get_new_axes() + + def get_result(self): + + # series only + if self._is_series: + + # stack blocks + if self.axis == 0: + # concat Series with length to keep dtype as much + non_empties = [x for x in self.objs if len(x) > 0] + if len(non_empties) > 0: + values = [x._values for x in non_empties] + else: + values = [x._values for x in self.objs] + new_data = _concat._concat_compat(values) + + name = com._consensus_name_attr(self.objs) + cons = _concat._get_series_result_type(new_data) + + return (cons(new_data, index=self.new_axes[0], + name=name, dtype=new_data.dtype) + .__finalize__(self, method='concat')) + + # combine as columns in a frame + else: + data = dict(zip(range(len(self.objs)), self.objs)) + cons = _concat._get_series_result_type(data) + + index, columns = self.new_axes + df = cons(data, index=index) + df.columns = columns + return df.__finalize__(self, method='concat') + + # combine block managers + else: + mgrs_indexers = [] + for obj in self.objs: + mgr = obj._data + indexers = {} + for ax, new_labels in enumerate(self.new_axes): + if ax == self.axis: + # Suppress reindexing on concat axis + continue + + obj_labels = mgr.axes[ax] + if not new_labels.equals(obj_labels): + indexers[ax] = obj_labels.reindex(new_labels)[1] + + mgrs_indexers.append((obj._data, indexers)) + + new_data = concatenate_block_managers( + mgrs_indexers, self.new_axes, concat_axis=self.axis, + copy=self.copy) + if not self.copy: + new_data._consolidate_inplace() + + cons = _concat._get_frame_result_type(new_data, self.objs) + return (cons._from_axes(new_data, self.new_axes) + .__finalize__(self, method='concat')) + + def _get_result_dim(self): + if self._is_series and self.axis == 1: + return 2 + else: + return self.objs[0].ndim + + def _get_new_axes(self): + ndim = self._get_result_dim() + new_axes = [None] * ndim + + if self.join_axes is None: + for i in range(ndim): + if i == self.axis: + continue + new_axes[i] = self._get_comb_axis(i) + else: + if len(self.join_axes) != ndim - 1: + raise AssertionError("length of join_axes must not be " + "equal to {0}".format(ndim - 1)) + + # ufff... + indices = compat.lrange(ndim) + indices.remove(self.axis) + + for i, ax in zip(indices, self.join_axes): + new_axes[i] = ax + + new_axes[self.axis] = self._get_concat_axis() + return new_axes + + def _get_comb_axis(self, i): + if self._is_series: + all_indexes = [x.index for x in self.objs] + else: + try: + all_indexes = [x._data.axes[i] for x in self.objs] + except IndexError: + types = [type(x).__name__ for x in self.objs] + raise TypeError("Cannot concatenate list of %s" % types) + + return _get_combined_index(all_indexes, intersect=self.intersect) + + def _get_concat_axis(self): + """ + Return index to be used along concatenation axis. + """ + if self._is_series: + if self.axis == 0: + indexes = [x.index for x in self.objs] + elif self.ignore_index: + idx = com._default_index(len(self.objs)) + return idx + elif self.keys is None: + names = [None] * len(self.objs) + num = 0 + has_names = False + for i, x in enumerate(self.objs): + if not isinstance(x, Series): + raise TypeError("Cannot concatenate type 'Series' " + "with object of type " + "%r" % type(x).__name__) + if x.name is not None: + names[i] = x.name + has_names = True + else: + names[i] = num + num += 1 + if has_names: + return Index(names) + else: + return com._default_index(len(self.objs)) + else: + return _ensure_index(self.keys) + else: + indexes = [x._data.axes[self.axis] for x in self.objs] + + if self.ignore_index: + idx = com._default_index(sum(len(i) for i in indexes)) + return idx + + if self.keys is None: + concat_axis = _concat_indexes(indexes) + else: + concat_axis = _make_concat_multiindex(indexes, self.keys, + self.levels, self.names) + + self._maybe_check_integrity(concat_axis) + + return concat_axis + + def _maybe_check_integrity(self, concat_index): + if self.verify_integrity: + if not concat_index.is_unique: + overlap = concat_index.get_duplicates() + raise ValueError('Indexes have overlapping values: %s' + % str(overlap)) + + +def _concat_indexes(indexes): + return indexes[0].append(indexes[1:]) + + +def _make_concat_multiindex(indexes, keys, levels=None, names=None): + + if ((levels is None and isinstance(keys[0], tuple)) or + (levels is not None and len(levels) > 1)): + zipped = compat.lzip(*keys) + if names is None: + names = [None] * len(zipped) + + if levels is None: + _, levels = _factorize_from_iterables(zipped) + else: + levels = [_ensure_index(x) for x in levels] + else: + zipped = [keys] + if names is None: + names = [None] + + if levels is None: + levels = [_ensure_index(keys)] + else: + levels = [_ensure_index(x) for x in levels] + + if not _all_indexes_same(indexes): + label_list = [] + + # things are potentially different sizes, so compute the exact labels + # for each level and pass those to MultiIndex.from_arrays + + for hlevel, level in zip(zipped, levels): + to_concat = [] + for key, index in zip(hlevel, indexes): + try: + i = level.get_loc(key) + except KeyError: + raise ValueError('Key %s not in level %s' + % (str(key), str(level))) + + to_concat.append(np.repeat(i, len(index))) + label_list.append(np.concatenate(to_concat)) + + concat_index = _concat_indexes(indexes) + + # these go at the end + if isinstance(concat_index, MultiIndex): + levels.extend(concat_index.levels) + label_list.extend(concat_index.labels) + else: + codes, categories = _factorize_from_iterable(concat_index) + levels.append(categories) + label_list.append(codes) + + if len(names) == len(levels): + names = list(names) + else: + # make sure that all of the passed indices have the same nlevels + if not len(set([idx.nlevels for idx in indexes])) == 1: + raise AssertionError("Cannot concat indices that do" + " not have the same number of levels") + + # also copies + names = names + _get_consensus_names(indexes) + + return MultiIndex(levels=levels, labels=label_list, names=names, + verify_integrity=False) + + new_index = indexes[0] + n = len(new_index) + kpieces = len(indexes) + + # also copies + new_names = list(names) + new_levels = list(levels) + + # construct labels + new_labels = [] + + # do something a bit more speedy + + for hlevel, level in zip(zipped, levels): + hlevel = _ensure_index(hlevel) + mapped = level.get_indexer(hlevel) + + mask = mapped == -1 + if mask.any(): + raise ValueError('Values not found in passed level: %s' + % str(hlevel[mask])) + + new_labels.append(np.repeat(mapped, n)) + + if isinstance(new_index, MultiIndex): + new_levels.extend(new_index.levels) + new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) + else: + new_levels.append(new_index) + new_labels.append(np.tile(np.arange(n), kpieces)) + + if len(new_names) < len(new_levels): + new_names.extend(new_index.names) + + return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, + verify_integrity=False) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 3fbd83a6f3245..d938c2eeacbef 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -4,19 +4,16 @@ import copy import warnings - import string import numpy as np -from pandas.compat import range, lrange, lzip, zip, map, filter +from pandas.compat import range, lzip, zip, map, filter import pandas.compat as compat -from pandas import (Categorical, DataFrame, Series, +import pandas as pd +from pandas import (Categorical, Series, DataFrame, Index, MultiIndex, Timedelta) -from pandas.core.categorical import (_factorize_from_iterable, - _factorize_from_iterables) from pandas.core.frame import _merge_doc -from pandas.types.generic import ABCSeries from pandas.types.common import (is_datetime64tz_dtype, is_datetime64_dtype, needs_i8_conversion, @@ -33,23 +30,31 @@ _ensure_object, _get_dtype) from pandas.types.missing import na_value_for_dtype - -from pandas.core.generic import NDFrame -from pandas.core.index import (_get_combined_index, - _ensure_index, _get_consensus_names, - _all_indexes_same) from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution import pandas.core.algorithms as algos import pandas.core.common as com -import pandas.types.concat as _concat import pandas._join as _join import pandas.hashtable as _hash +# back-compat of pseudo-public API +def concat_wrap(): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.tools.merge.concat is deprecated. " + "import from the public API: " + "pandas.concat instead", + FutureWarning, stacklevel=3) + return pd.concat(*args, **kwargs) + return wrapper + +concat = concat_wrap() + + @Substitution('\nleft : DataFrame') @Appender(_merge_doc, indents=0) def merge(left, right, how='inner', on=None, left_on=None, right_on=None, @@ -139,6 +144,7 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces, # preserve the original order # if we have a missing piece this can be reset + from pandas.tools.concat import concat result = concat(pieces, ignore_index=True) result = result.reindex(columns=pieces[0].columns, copy=False) return result, lby @@ -793,9 +799,9 @@ def _get_merge_keys(self): left, right = self.left, self.right is_lkey = lambda x: isinstance( - x, (np.ndarray, ABCSeries)) and len(x) == len(left) + x, (np.ndarray, Series)) and len(x) == len(left) is_rkey = lambda x: isinstance( - x, (np.ndarray, ABCSeries)) and len(x) == len(right) + x, (np.ndarray, Series)) and len(x) == len(right) # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a @@ -1419,606 +1425,6 @@ def _get_join_keys(llab, rlab, shape, sort): return _get_join_keys(llab, rlab, shape, sort) -# --------------------------------------------------------------------- -# Concatenate DataFrame objects - - -def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, - keys=None, levels=None, names=None, verify_integrity=False, - copy=True): - """ - Concatenate pandas objects along a particular axis with optional set logic - along the other axes. - - Can also add a layer of hierarchical indexing on the concatenation axis, - which may be useful if the labels are the same (or overlapping) on - the passed axis number. - - Parameters - ---------- - objs : a sequence or mapping of Series, DataFrame, or Panel objects - If a dict is passed, the sorted keys will be used as the `keys` - argument, unless it is passed, in which case the values will be - selected (see below). Any None objects will be dropped silently unless - they are all None in which case a ValueError will be raised - axis : {0/'index', 1/'columns'}, default 0 - The axis to concatenate along - join : {'inner', 'outer'}, default 'outer' - How to handle indexes on other axis(es) - join_axes : list of Index objects - Specific indexes to use for the other n - 1 axes instead of performing - inner/outer set logic - ignore_index : boolean, default False - If True, do not use the index values along the concatenation axis. The - resulting axis will be labeled 0, ..., n - 1. This is useful if you are - concatenating objects where the concatenation axis does not have - meaningful indexing information. Note the index values on the other - axes are still respected in the join. - keys : sequence, default None - If multiple levels passed, should contain tuples. Construct - hierarchical index using the passed keys as the outermost level - levels : list of sequences, default None - Specific levels (unique values) to use for constructing a - MultiIndex. Otherwise they will be inferred from the keys - names : list, default None - Names for the levels in the resulting hierarchical index - verify_integrity : boolean, default False - Check whether the new concatenated axis contains duplicates. This can - be very expensive relative to the actual data concatenation - copy : boolean, default True - If False, do not copy data unnecessarily - - Returns - ------- - concatenated : type of objects - - Notes - ----- - The keys, levels, and names arguments are all optional. - - A walkthrough of how this method fits in with other tools for combining - panda objects can be found `here - `__. - - See Also - -------- - Series.append - DataFrame.append - DataFrame.join - DataFrame.merge - - Examples - -------- - Combine two ``Series``. - - >>> s1 = pd.Series(['a', 'b']) - >>> s2 = pd.Series(['c', 'd']) - >>> pd.concat([s1, s2]) - 0 a - 1 b - 0 c - 1 d - dtype: object - - Clear the existing index and reset it in the result - by setting the ``ignore_index`` option to ``True``. - - >>> pd.concat([s1, s2], ignore_index=True) - 0 a - 1 b - 2 c - 3 d - dtype: object - - Add a hierarchical index at the outermost level of - the data with the ``keys`` option. - - >>> pd.concat([s1, s2], keys=['s1', 's2',]) - s1 0 a - 1 b - s2 0 c - 1 d - dtype: object - - Label the index keys you create with the ``names`` option. - - >>> pd.concat([s1, s2], keys=['s1', 's2'], - ... names=['Series name', 'Row ID']) - Series name Row ID - s1 0 a - 1 b - s2 0 c - 1 d - dtype: object - - Combine two ``DataFrame`` objects with identical columns. - - >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], - ... columns=['letter', 'number']) - >>> df1 - letter number - 0 a 1 - 1 b 2 - >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], - ... columns=['letter', 'number']) - >>> df2 - letter number - 0 c 3 - 1 d 4 - >>> pd.concat([df1, df2]) - letter number - 0 a 1 - 1 b 2 - 0 c 3 - 1 d 4 - - Combine ``DataFrame`` objects with overlapping columns - and return everything. Columns outside the intersection will - be filled with ``NaN`` values. - - >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], - ... columns=['letter', 'number', 'animal']) - >>> df3 - letter number animal - 0 c 3 cat - 1 d 4 dog - >>> pd.concat([df1, df3]) - animal letter number - 0 NaN a 1 - 1 NaN b 2 - 0 cat c 3 - 1 dog d 4 - - Combine ``DataFrame`` objects with overlapping columns - and return only those that are shared by passing ``inner`` to - the ``join`` keyword argument. - - >>> pd.concat([df1, df3], join="inner") - letter number - 0 a 1 - 1 b 2 - 0 c 3 - 1 d 4 - - Combine ``DataFrame`` objects horizontally along the x axis by - passing in ``axis=1``. - - >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], - ... columns=['animal', 'name']) - >>> pd.concat([df1, df4], axis=1) - letter number animal name - 0 a 1 bird polly - 1 b 2 monkey george - - Prevent the result from including duplicate index values with the - ``verify_integrity`` option. - - >>> df5 = pd.DataFrame([1], index=['a']) - >>> df5 - 0 - a 1 - >>> df6 = pd.DataFrame([2], index=['a']) - >>> df6 - 0 - a 2 - >>> pd.concat([df5, df6], verify_integrity=True) - ValueError: Indexes have overlapping values: ['a'] - """ - op = _Concatenator(objs, axis=axis, join_axes=join_axes, - ignore_index=ignore_index, join=join, - keys=keys, levels=levels, names=names, - verify_integrity=verify_integrity, - copy=copy) - return op.get_result() - - -class _Concatenator(object): - """ - Orchestrates a concatenation operation for BlockManagers - """ - - def __init__(self, objs, axis=0, join='outer', join_axes=None, - keys=None, levels=None, names=None, - ignore_index=False, verify_integrity=False, copy=True): - if isinstance(objs, (NDFrame, compat.string_types)): - raise TypeError('first argument must be an iterable of pandas ' - 'objects, you passed an object of type ' - '"{0}"'.format(type(objs).__name__)) - - if join == 'outer': - self.intersect = False - elif join == 'inner': - self.intersect = True - else: # pragma: no cover - raise ValueError('Only can inner (intersect) or outer (union) ' - 'join the other axis') - - if isinstance(objs, dict): - if keys is None: - keys = sorted(objs) - objs = [objs[k] for k in keys] - else: - objs = list(objs) - - if len(objs) == 0: - raise ValueError('No objects to concatenate') - - if keys is None: - objs = [obj for obj in objs if obj is not None] - else: - # #1649 - clean_keys = [] - clean_objs = [] - for k, v in zip(keys, objs): - if v is None: - continue - clean_keys.append(k) - clean_objs.append(v) - objs = clean_objs - name = getattr(keys, 'name', None) - keys = Index(clean_keys, name=name) - - if len(objs) == 0: - raise ValueError('All objects passed were None') - - # consolidate data & figure out what our result ndim is going to be - ndims = set() - for obj in objs: - if not isinstance(obj, NDFrame): - raise TypeError("cannot concatenate a non-NDFrame object") - - # consolidate - obj.consolidate(inplace=True) - ndims.add(obj.ndim) - - # get the sample - # want the higest ndim that we have, and must be non-empty - # unless all objs are empty - sample = None - if len(ndims) > 1: - max_ndim = max(ndims) - for obj in objs: - if obj.ndim == max_ndim and np.sum(obj.shape): - sample = obj - break - - else: - # filter out the empties if we have not multi-index possibiltes - # note to keep empty Series as it affect to result columns / name - non_empties = [obj for obj in objs - if sum(obj.shape) > 0 or isinstance(obj, Series)] - - if (len(non_empties) and (keys is None and names is None and - levels is None and join_axes is None)): - objs = non_empties - sample = objs[0] - - if sample is None: - sample = objs[0] - self.objs = objs - - # Standardize axis parameter to int - if isinstance(sample, Series): - axis = DataFrame()._get_axis_number(axis) - else: - axis = sample._get_axis_number(axis) - - # Need to flip BlockManager axis in the DataFrame special case - self._is_frame = isinstance(sample, DataFrame) - if self._is_frame: - axis = 1 if axis == 0 else 0 - - self._is_series = isinstance(sample, ABCSeries) - if not 0 <= axis <= sample.ndim: - raise AssertionError("axis must be between 0 and {0}, " - "input was {1}".format(sample.ndim, axis)) - - # if we have mixed ndims, then convert to highest ndim - # creating column numbers as needed - if len(ndims) > 1: - current_column = 0 - max_ndim = sample.ndim - self.objs, objs = [], self.objs - for obj in objs: - - ndim = obj.ndim - if ndim == max_ndim: - pass - - elif ndim != max_ndim - 1: - raise ValueError("cannot concatenate unaligned mixed " - "dimensional NDFrame objects") - - else: - name = getattr(obj, 'name', None) - if ignore_index or name is None: - name = current_column - current_column += 1 - - # doing a row-wise concatenation so need everything - # to line up - if self._is_frame and axis == 1: - name = 0 - obj = sample._constructor({name: obj}) - - self.objs.append(obj) - - # note: this is the BlockManager axis (since DataFrame is transposed) - self.axis = axis - self.join_axes = join_axes - self.keys = keys - self.names = names or getattr(keys, 'names', None) - self.levels = levels - - self.ignore_index = ignore_index - self.verify_integrity = verify_integrity - self.copy = copy - - self.new_axes = self._get_new_axes() - - def get_result(self): - - # series only - if self._is_series: - - # stack blocks - if self.axis == 0: - # concat Series with length to keep dtype as much - non_empties = [x for x in self.objs if len(x) > 0] - if len(non_empties) > 0: - values = [x._values for x in non_empties] - else: - values = [x._values for x in self.objs] - new_data = _concat._concat_compat(values) - - name = com._consensus_name_attr(self.objs) - cons = _concat._get_series_result_type(new_data) - - return (cons(new_data, index=self.new_axes[0], - name=name, dtype=new_data.dtype) - .__finalize__(self, method='concat')) - - # combine as columns in a frame - else: - data = dict(zip(range(len(self.objs)), self.objs)) - cons = _concat._get_series_result_type(data) - - index, columns = self.new_axes - df = cons(data, index=index) - df.columns = columns - return df.__finalize__(self, method='concat') - - # combine block managers - else: - mgrs_indexers = [] - for obj in self.objs: - mgr = obj._data - indexers = {} - for ax, new_labels in enumerate(self.new_axes): - if ax == self.axis: - # Suppress reindexing on concat axis - continue - - obj_labels = mgr.axes[ax] - if not new_labels.equals(obj_labels): - indexers[ax] = obj_labels.reindex(new_labels)[1] - - mgrs_indexers.append((obj._data, indexers)) - - new_data = concatenate_block_managers( - mgrs_indexers, self.new_axes, concat_axis=self.axis, - copy=self.copy) - if not self.copy: - new_data._consolidate_inplace() - - cons = _concat._get_frame_result_type(new_data, self.objs) - return (cons._from_axes(new_data, self.new_axes) - .__finalize__(self, method='concat')) - - def _get_result_dim(self): - if self._is_series and self.axis == 1: - return 2 - else: - return self.objs[0].ndim - - def _get_new_axes(self): - ndim = self._get_result_dim() - new_axes = [None] * ndim - - if self.join_axes is None: - for i in range(ndim): - if i == self.axis: - continue - new_axes[i] = self._get_comb_axis(i) - else: - if len(self.join_axes) != ndim - 1: - raise AssertionError("length of join_axes must not be " - "equal to {0}".format(ndim - 1)) - - # ufff... - indices = lrange(ndim) - indices.remove(self.axis) - - for i, ax in zip(indices, self.join_axes): - new_axes[i] = ax - - new_axes[self.axis] = self._get_concat_axis() - return new_axes - - def _get_comb_axis(self, i): - if self._is_series: - all_indexes = [x.index for x in self.objs] - else: - try: - all_indexes = [x._data.axes[i] for x in self.objs] - except IndexError: - types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of %s" % types) - - return _get_combined_index(all_indexes, intersect=self.intersect) - - def _get_concat_axis(self): - """ - Return index to be used along concatenation axis. - """ - if self._is_series: - if self.axis == 0: - indexes = [x.index for x in self.objs] - elif self.ignore_index: - idx = com._default_index(len(self.objs)) - return idx - elif self.keys is None: - names = [None] * len(self.objs) - num = 0 - has_names = False - for i, x in enumerate(self.objs): - if not isinstance(x, Series): - raise TypeError("Cannot concatenate type 'Series' " - "with object of type " - "%r" % type(x).__name__) - if x.name is not None: - names[i] = x.name - has_names = True - else: - names[i] = num - num += 1 - if has_names: - return Index(names) - else: - return com._default_index(len(self.objs)) - else: - return _ensure_index(self.keys) - else: - indexes = [x._data.axes[self.axis] for x in self.objs] - - if self.ignore_index: - idx = com._default_index(sum(len(i) for i in indexes)) - return idx - - if self.keys is None: - concat_axis = _concat_indexes(indexes) - else: - concat_axis = _make_concat_multiindex(indexes, self.keys, - self.levels, self.names) - - self._maybe_check_integrity(concat_axis) - - return concat_axis - - def _maybe_check_integrity(self, concat_index): - if self.verify_integrity: - if not concat_index.is_unique: - overlap = concat_index.get_duplicates() - raise ValueError('Indexes have overlapping values: %s' - % str(overlap)) - - -def _concat_indexes(indexes): - return indexes[0].append(indexes[1:]) - - -def _make_concat_multiindex(indexes, keys, levels=None, names=None): - - if ((levels is None and isinstance(keys[0], tuple)) or - (levels is not None and len(levels) > 1)): - zipped = lzip(*keys) - if names is None: - names = [None] * len(zipped) - - if levels is None: - _, levels = _factorize_from_iterables(zipped) - else: - levels = [_ensure_index(x) for x in levels] - else: - zipped = [keys] - if names is None: - names = [None] - - if levels is None: - levels = [_ensure_index(keys)] - else: - levels = [_ensure_index(x) for x in levels] - - if not _all_indexes_same(indexes): - label_list = [] - - # things are potentially different sizes, so compute the exact labels - # for each level and pass those to MultiIndex.from_arrays - - for hlevel, level in zip(zipped, levels): - to_concat = [] - for key, index in zip(hlevel, indexes): - try: - i = level.get_loc(key) - except KeyError: - raise ValueError('Key %s not in level %s' - % (str(key), str(level))) - - to_concat.append(np.repeat(i, len(index))) - label_list.append(np.concatenate(to_concat)) - - concat_index = _concat_indexes(indexes) - - # these go at the end - if isinstance(concat_index, MultiIndex): - levels.extend(concat_index.levels) - label_list.extend(concat_index.labels) - else: - codes, categories = _factorize_from_iterable(concat_index) - levels.append(categories) - label_list.append(codes) - - if len(names) == len(levels): - names = list(names) - else: - # make sure that all of the passed indices have the same nlevels - if not len(set([idx.nlevels for idx in indexes])) == 1: - raise AssertionError("Cannot concat indices that do" - " not have the same number of levels") - - # also copies - names = names + _get_consensus_names(indexes) - - return MultiIndex(levels=levels, labels=label_list, names=names, - verify_integrity=False) - - new_index = indexes[0] - n = len(new_index) - kpieces = len(indexes) - - # also copies - new_names = list(names) - new_levels = list(levels) - - # construct labels - new_labels = [] - - # do something a bit more speedy - - for hlevel, level in zip(zipped, levels): - hlevel = _ensure_index(hlevel) - mapped = level.get_indexer(hlevel) - - mask = mapped == -1 - if mask.any(): - raise ValueError('Values not found in passed level: %s' - % str(hlevel[mask])) - - new_labels.append(np.repeat(mapped, n)) - - if isinstance(new_index, MultiIndex): - new_levels.extend(new_index.levels) - new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) - else: - new_levels.append(new_index) - new_labels.append(np.tile(np.arange(n), kpieces)) - - if len(new_names) < len(new_levels): - new_names.extend(new_index.names) - - return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, - verify_integrity=False) - def _should_fill(lname, rname): if (not isinstance(lname, compat.string_types) or diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 01eefe5f07173..41fc705691a96 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -2,10 +2,8 @@ from pandas.types.common import is_list_like, is_scalar -from pandas import Series, DataFrame -from pandas.core.index import MultiIndex, Index +from pandas import Series, DataFrame, MultiIndex, Index, concat from pandas.core.groupby import Grouper -from pandas.tools.merge import concat from pandas.tools.util import cartesian_product from pandas.compat import range, lrange, zip from pandas import compat diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index ee70515850b25..0b1ced97d2b81 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -3135,7 +3135,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: - from pandas.tools.merge import concat + from pandas.tools.concat import concat keys, frames = zip(*grouped) if grouped.axis == 0: df = concat(frames, keys=keys, axis=1) diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py index ff0a494bd7d02..fe5821a637205 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tools/tests/test_join.py @@ -6,9 +6,8 @@ import pandas as pd from pandas.compat import lrange import pandas.compat as compat -from pandas.tools.merge import merge, concat from pandas.util.testing import assert_frame_equal -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, MultiIndex, Series, merge, concat import pandas._join as _join import pandas.util.testing as tm diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index a348a901442c9..d66cd793ec0be 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -8,7 +8,8 @@ import pandas as pd from pandas.compat import lrange, lzip -from pandas.tools.merge import merge, concat, MergeError +from pandas.tools.concat import concat +from pandas.tools.merge import merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, slow) diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tools/tests/test_merge_ordered.py index e08cc98e50794..e4a41ea9a28eb 100644 --- a/pandas/tools/tests/test_merge_ordered.py +++ b/pandas/tools/tests/test_merge_ordered.py @@ -40,10 +40,8 @@ def test_ffill(self): def test_multigroup(self): left = pd.concat([self.left, self.left], ignore_index=True) - # right = concat([self.right, self.right], ignore_index=True) left['group'] = ['a'] * 3 + ['b'] * 3 - # right['group'] = ['a'] * 4 + ['b'] * 4 result = merge_ordered(left, self.right, on='key', left_by='group', fill_method='ffill') diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 7f2bb7e724362..f5d91d0088306 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -3,8 +3,8 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Series, Index, MultiIndex, Grouper, date_range -from pandas.tools.merge import concat +from pandas import (DataFrame, Series, Index, MultiIndex, + Grouper, date_range, concat) from pandas.tools.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm