Deprecate cols in to_csv, to_excel, drop_duplicates, and duplicated. …

…Use decorator. Update docs and unit tests. [fix pandas-dev#6645, fix#6680]
jsexauer · Mar 24, 2014 · a36109d · a36109d
1 parent 7ffa655
commit a36109d
Show file tree

Hide file tree

Showing 9 changed files with 201 additions and 104 deletions.
diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst
@@ -171,7 +171,7 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
       'player': random.sample(list(string.ascii_lowercase),25),
       'batting avg': np.random.uniform(.200, .400, 25)
       })
-   baseball.pivot_table(values='batting avg', cols='team', aggfunc=np.max)
+   baseball.pivot_table(values='batting avg', columns='team', aggfunc=np.max)
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>`.
@@ -402,8 +402,8 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
         'week': [1,2]*6
    })
    mdf = pd.melt(df, id_vars=['month', 'week'])
-   pd.pivot_table(mdf, values='value', rows=['variable','week'],
-                    cols=['month'], aggfunc=np.mean)
+   pd.pivot_table(mdf, values='value', index=['variable','week'],
+                    columns=['month'], aggfunc=np.mean)
 
 Similarly for ``dcast`` which uses a data.frame called ``df`` in R to
 aggregate information based on ``Animal`` and ``FeedType``:
@@ -433,7 +433,7 @@ using :meth:`~pandas.pivot_table`:
        'Amount': [10, 7, 4, 2, 5, 6, 2],
    })
 
-   df.pivot_table(values='Amount', rows='Animal', cols='FeedType', aggfunc='sum')
+   df.pivot_table(values='Amount', index='Animal', columns='FeedType', aggfunc='sum')
 
 The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
 

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -129,11 +129,6 @@ API Changes
     ``DataFrame.stack`` operations where the name of the column index is used as
     the name of the inserted column containing the pivoted data.
 
-- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions
-  now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``.  A
-  ``FutureWarning`` is raised  to alert that the old ``rows`` and ``cols`` arguments
-  will not be supported in a future release (:issue:`5505`)
-
 - Allow specification of a more complex groupby, via ``pd.Grouper`` (:issue:`3794`)
 
 - A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of
@@ -149,6 +144,21 @@ API Changes
 Deprecations
 ~~~~~~~~~~~~
 
+- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions
+  now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``.  A
+  ``FutureWarning`` is raised  to alert that the old ``rows`` and ``cols`` arguments
+  will not be supported in a future release (:issue:`5505`)
+
+- The :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.duplicated` methods
+  now take argument ``subset`` instead of ``cols`` to better align with
+  :meth:`DataFrame.dropna`.  A ``FutureWarning`` is raised  to alert that the old
+  ``cols`` arguments will not be supported in a future release (:issue:`6680`)
+
+- The :meth:`DataFrame.to_csv` and :meth:`DataFrame.to_excel` functions
+  now takes argument ``columns`` instead of ``cols``.  A
+  ``FutureWarning`` is raised  to alert that the old ``cols`` arguments
+  will not be supported in a future release (:issue:`6645`)
+
 Prior Version Deprecations/Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
@@ -283,9 +283,9 @@ We can produce pivot tables from this data very easily:
 
 .. ipython:: python
 
-   pivot_table(df, values='D', rows=['A', 'B'], cols=['C'])
-   pivot_table(df, values='D', rows=['B'], cols=['A', 'C'], aggfunc=np.sum)
-   pivot_table(df, values=['D','E'], rows=['B'], cols=['A', 'C'], aggfunc=np.sum)
+   pivot_table(df, values='D', index=['A', 'B'], columns=['C'])
+   pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum)
+   pivot_table(df, values=['D','E'], index=['B'], columns=['A', 'C'], aggfunc=np.sum)
 
 The result object is a DataFrame having potentially hierarchical indexes on the
 rows and columns. If the ``values`` column name is not given, the pivot table
@@ -294,14 +294,14 @@ hierarchy in the columns:
 
 .. ipython:: python
 
-   pivot_table(df, rows=['A', 'B'], cols=['C'])
+   pivot_table(df, index=['A', 'B'], columns=['C'])
 
 You can render a nice output of the table omitting the missing values by
 calling ``to_string`` if you wish:
 
 .. ipython:: python
 
-   table = pivot_table(df, rows=['A', 'B'], cols=['C'])
+   table = pivot_table(df, index=['A', 'B'], columns=['C'])
    print(table.to_string(na_rep=''))
 
 Note that ``pivot_table`` is also available as an instance method on DataFrame.
@@ -315,8 +315,8 @@ unless an array of values and an aggregation function are passed.
 
 It takes a number of arguments
 
-- ``rows``: array-like, values to group by in the rows
-- ``cols``: array-like, values to group by in the columns
+- ``index``: array-like, values to group by in the rows
+- ``columns``: array-like, values to group by in the columns
 - ``values``: array-like, optional, array of values to aggregate according to
   the factors
 - ``aggfunc``: function, optional, If no values array is passed, computes a
@@ -350,7 +350,7 @@ rows and columns:
 
 .. ipython:: python
 
-   df.pivot_table(rows=['A', 'B'], cols='C', margins=True, aggfunc=np.std)
+   df.pivot_table(index=['A', 'B'], columns='C', margins=True, aggfunc=np.std)
 
 .. _reshaping.tile:
 

diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -173,11 +173,6 @@ These are out-of-bounds selections
     # New output, 4-level MultiIndex
     df_multi.set_index([df_multi.index, df_multi.index])
 
-- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions
-  now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``.  A
-  ``FutureWarning`` is raised  to alert that the old ``rows`` and ``cols`` arguments
-  will not be supported in a future release (:issue:`5505`)
-
 - Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`.
   - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten.
   - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`)
@@ -313,8 +308,20 @@ Therse are prior version deprecations that are taking effect as of 0.14.0.
 
 Deprecations
 ~~~~~~~~~~~~
+- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions
+  now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``.  A
+  ``FutureWarning`` is raised  to alert that the old ``rows`` and ``cols`` arguments
+  will not be supported in a future release (:issue:`5505`)
+
+- The :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.duplicated` methods
+  now take argument ``subset`` instead of ``cols`` to better align with
+  :meth:`DataFrame.dropna`.  A ``FutureWarning`` is raised  to alert that the old
+  ``cols`` arguments will not be supported in a future release (:issue:`6680`)
 
-There are no deprecations of prior behavior in 0.14.0
+- The :meth:`DataFrame.to_csv` and :meth:`DataFrame.to_excel` functions
+  now takes argument ``columns`` instead of ``cols``.  A
+  ``FutureWarning`` is raised  to alert that the old ``cols`` arguments
+  will not be supported in a future release (:issue:`6645`)
 
 Enhancements
 ~~~~~~~~~~~~

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -41,7 +41,8 @@
 from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u,
                           OrderedDict, raise_with_traceback)
 from pandas import compat
-from pandas.util.decorators import deprecate, Appender, Substitution
+from pandas.util.decorators import deprecate, Appender, Substitution, \
+    deprecate_kwarg
 
 from pandas.tseries.period import PeriodIndex
 from pandas.tseries.index import DatetimeIndex
@@ -1067,8 +1068,9 @@ def to_panel(self):
 
     to_wide = deprecate('to_wide', to_panel)
 
+    @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns')
     def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
-               cols=None, header=True, index=True, index_label=None,
+               columns=None, header=True, index=True, index_label=None,
                mode='w', nanRep=None, encoding=None, quoting=None,
                quotechar='"', line_terminator='\n', chunksize=None,
                tupleize_cols=False, date_format=None, doublequote=True,
@@ -1086,7 +1088,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             Missing data representation
         float_format : string, default None
             Format string for floating point numbers
-        cols : sequence, optional
+        columns : sequence, optional
             Columns to write
         header : boolean or list of string, default True
             Write out column names. If a list of string is given it is assumed
@@ -1124,6 +1126,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             or new (expanded format) if False)
         date_format : string, default None
             Format string for datetime objects
+        cols : kwarg only alias of columns [deprecated]
         """
         if nanRep is not None:  # pragma: no cover
             warnings.warn("nanRep is deprecated, use na_rep",
@@ -1134,7 +1137,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
                                      line_terminator=line_terminator,
                                      sep=sep, encoding=encoding,
                                      quoting=quoting, na_rep=na_rep,
-                                     float_format=float_format, cols=cols,
+                                     float_format=float_format, cols=columns,
                                      header=header, index=index,
                                      index_label=index_label, mode=mode,
                                      chunksize=chunksize, quotechar=quotechar,
@@ -1148,8 +1151,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
         if path_or_buf is None:
             return formatter.path_or_buf.getvalue()
 
+    @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns')
     def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
-                 float_format=None, cols=None, header=True, index=True,
+                 float_format=None, columns=None, header=True, index=True,
                  index_label=None, startrow=0, startcol=0, engine=None,
                  merge_cells=True, encoding=None):
         """
@@ -1189,6 +1193,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
         encoding: string, default None
             encoding of the resulting excel file. Only necessary for xlwt,
             other writers support unicode natively.
+        cols : kwarg only alias of columns [deprecated]
 
         Notes
         -----
@@ -1202,6 +1207,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
         >>> writer.save()
         """
         from pandas.io.excel import ExcelWriter
+
         need_save = False
         if encoding == None:
             encoding = 'ascii'
@@ -1212,7 +1218,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
 
         formatter = fmt.ExcelFormatter(self,
                                        na_rep=na_rep,
-                                       cols=cols,
+                                       cols=columns,
                                        header=header,
                                        float_format=float_format,
                                        index=index,
@@ -2439,27 +2445,28 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
         else:
             return result
 
-    def drop_duplicates(self, cols=None, take_last=False, inplace=False):
+    @deprecate_kwarg(old_arg_name='cols', new_arg_name='subset')
+    def drop_duplicates(self, subset=None, take_last=False, inplace=False):
         """
         Return DataFrame with duplicate rows removed, optionally only
         considering certain columns
 
         Parameters
         ----------
-        cols : column label or sequence of labels, optional
+        subset : column label or sequence of labels, optional
             Only consider certain columns for identifying duplicates, by
             default use all of the columns
         take_last : boolean, default False
             Take the last observed row in a row. Defaults to the first row
         inplace : boolean, default False
             Whether to drop duplicates in place or to return a copy
+        cols : kwargs only argument of subset [deprecated]
 
         Returns
         -------
         deduplicated : DataFrame
         """
-
-        duplicated = self.duplicated(cols, take_last=take_last)
+        duplicated = self.duplicated(subset, take_last=take_last)
 
         if inplace:
             inds, = (-duplicated).nonzero()
@@ -2468,18 +2475,20 @@ def drop_duplicates(self, cols=None, take_last=False, inplace=False):
         else:
             return self[-duplicated]
 
-    def duplicated(self, cols=None, take_last=False):
+    @deprecate_kwarg(old_arg_name='cols', new_arg_name='subset')
+    def duplicated(self, subset=None, take_last=False):
         """
         Return boolean Series denoting duplicate rows, optionally only
         considering certain columns
 
         Parameters
         ----------
-        cols : column label or sequence of labels, optional
+        subset : column label or sequence of labels, optional
             Only consider certain columns for identifying duplicates, by
             default use all of the columns
         take_last : boolean, default False
             Take the last observed row in a row. Defaults to the first row
+        cols : kwargs only argument of subset [deprecated]
 
         Returns
         -------
@@ -2491,19 +2500,19 @@ def _m8_to_i8(x):
                 return x.view(np.int64)
             return x
 
-        if cols is None:
+        if subset is None:
             values = list(_m8_to_i8(self.values.T))
         else:
-            if np.iterable(cols) and not isinstance(cols, compat.string_types):
-                if isinstance(cols, tuple):
-                    if cols in self.columns:
-                        values = [self[cols].values]
+            if np.iterable(subset) and not isinstance(subset, compat.string_types):
+                if isinstance(subset, tuple):
+                    if subset in self.columns:
+                        values = [self[subset].values]
                     else:
-                        values = [_m8_to_i8(self[x].values) for x in cols]
+                        values = [_m8_to_i8(self[x].values) for x in subset]
                 else:
-                    values = [_m8_to_i8(self[x].values) for x in cols]
+                    values = [_m8_to_i8(self[x].values) for x in subset]
             else:
-                values = [self[cols].values]
+                values = [self[subset].values]
 
         keys = lib.fast_zip_fillna(values)
         duplicated = lib.duplicated(keys, take_last=take_last)