diff --git a/ci/code_checks.sh b/ci/code_checks.sh index fac5c211cdad8..f0772f72d63d4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,6 +122,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then ! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG + ! grep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check for modules that pandas should not import' ; echo $MSG python -c " import sys diff --git a/doc/source/io.rst b/doc/source/io.rst index beb1c1daba962..34dc185c200e6 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2854,6 +2854,11 @@ It is often the case that users will insert columns to do temporary computations in Excel and you may not want to read in those columns. ``read_excel`` takes a ``usecols`` keyword to allow you to specify a subset of columns to parse. +.. deprecated:: 0.24.0 + +Passing in an integer for ``usecols`` has been deprecated. Please pass in a list +of ints from 0 to ``usecols`` inclusive instead. + If ``usecols`` is an integer, then it is assumed to indicate the last column to be parsed. diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 34921505a46bf..2445daebb580a 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -266,7 +266,7 @@ These changes conform sparse handling to return the correct types and work to ma ``SparseArray.take`` now returns a scalar for scalar input, ``SparseArray`` for others. Furthermore, it handles a negative indexer with the same rule as ``Index`` (:issue:`10560`, :issue:`12796`) -.. ipython:: python +.. code-block:: python s = pd.SparseArray([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) s.take(0) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 73fd526640212..20496c9fb3f31 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -970,6 +970,7 @@ Deprecations - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) +- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: @@ -1298,6 +1299,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`) - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) +- Bug in :func:`read_csv()` in which memory management was prematurely optimized for the C engine when the data was being read in chunks (:issue:`23509`) - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 3870a55c22fd6..40aa03caa56eb 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -132,6 +132,7 @@ cdef extern from "parser/tokenizer.h": int64_t *word_starts # where we are in the stream int64_t words_len int64_t words_cap + int64_t max_words_cap # maximum word cap encountered char *pword_start # pointer to stream start of current field int64_t word_start # position start of current field diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 2fce241027d56..e46e1e85f1c81 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -197,6 +197,7 @@ int parser_init(parser_t *self) { sz = sz ? sz : 1; self->words = (char **)malloc(sz * sizeof(char *)); self->word_starts = (int64_t *)malloc(sz * sizeof(int64_t)); + self->max_words_cap = sz; self->words_cap = sz; self->words_len = 0; @@ -247,7 +248,7 @@ void parser_del(parser_t *self) { } static int make_stream_space(parser_t *self, size_t nbytes) { - int64_t i, cap; + int64_t i, cap, length; int status; void *orig_ptr, *newptr; @@ -287,8 +288,23 @@ static int make_stream_space(parser_t *self, size_t nbytes) { */ cap = self->words_cap; + + /** + * If we are reading in chunks, we need to be aware of the maximum number + * of words we have seen in previous chunks (self->max_words_cap), so + * that way, we can properly allocate when reading subsequent ones. + * + * Otherwise, we risk a buffer overflow if we mistakenly under-allocate + * just because a recent chunk did not have as many words. + */ + if (self->words_len + nbytes < self->max_words_cap) { + length = self->max_words_cap - nbytes; + } else { + length = self->words_len; + } + self->words = - (char **)grow_buffer((void *)self->words, self->words_len, + (char **)grow_buffer((void *)self->words, length, (int64_t*)&self->words_cap, nbytes, sizeof(char *), &status); TRACE( @@ -1241,6 +1257,19 @@ int parser_trim_buffers(parser_t *self) { int64_t i; + /** + * Before we free up space and trim, we should + * save how many words we saw when parsing, if + * it exceeds the maximum number we saw before. + * + * This is important for when we read in chunks, + * so that we can inform subsequent chunk parsing + * as to how many words we could possibly see. + */ + if (self->words_cap > self->max_words_cap) { + self->max_words_cap = self->words_cap; + } + /* trim words, word_starts */ new_cap = _next_pow2(self->words_len) + 1; if (new_cap < self->words_cap) { diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 9fc3593aaaf5b..c32c061c7fa89 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -142,6 +142,7 @@ typedef struct parser_t { int64_t *word_starts; // where we are in the stream int64_t words_len; int64_t words_cap; + int64_t max_words_cap; // maximum word cap encountered char *pword_start; // pointer to stream start of current field int64_t word_start; // position start of current field diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3fa4f503d2dd5..daf2dcccd284b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -124,8 +124,12 @@ def asi8(self): # do not cache or you'll create a memory leak return self._data.view('i8') - # ------------------------------------------------------------------ - # Array-like Methods + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + @property + def nbytes(self): + return self._data.nbytes @property def shape(self): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b0485cc82f07f..a6f688fb0cf7a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -385,7 +385,7 @@ def _resolution(self): return libresolution.resolution(self.asi8, self.tz) # ---------------------------------------------------------------- - # Array-like Methods + # Array-Like / EA-Interface Methods def __array__(self, dtype=None): if is_object_dtype(dtype): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 482968fdb4766..b343d42ef3b7c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -272,10 +272,6 @@ def _concat_same_type(cls, to_concat): # -------------------------------------------------------------------- # Data / Attributes - @property - def nbytes(self): - # TODO(DatetimeArray): remove - return self._data.nbytes @cache_readonly def dtype(self): @@ -286,10 +282,6 @@ def _ndarray_values(self): # Ordinals return self._data - @property - def asi8(self): - return self._data - @property def freq(self): """Return the frequency object for this PeriodArray.""" @@ -330,6 +322,50 @@ def start_time(self): def end_time(self): return self.to_timestamp(how='end') + def to_timestamp(self, freq=None, how='start'): + """ + Cast to DatetimeArray/Index. + + Parameters + ---------- + freq : string or DateOffset, optional + Target frequency. The default is 'D' for week or longer, + 'S' otherwise + how : {'s', 'e', 'start', 'end'} + + Returns + ------- + DatetimeArray/Index + """ + from pandas.core.arrays import DatetimeArrayMixin + + how = libperiod._validate_end_alias(how) + + end = how == 'E' + if end: + if freq == 'B': + # roll forward to ensure we land on B date + adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') + return self.to_timestamp(how='start') + adjust + else: + adjust = Timedelta(1, 'ns') + return (self + self.freq).to_timestamp(how='start') - adjust + + if freq is None: + base, mult = frequencies.get_freq_code(self.freq) + freq = frequencies.get_to_timestamp_base(base) + else: + freq = Period._maybe_convert_freq(freq) + + base, mult = frequencies.get_freq_code(freq) + new_data = self.asfreq(freq, how=how) + + new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) + return DatetimeArrayMixin(new_data, freq='infer') + + # -------------------------------------------------------------------- + # Array-like / EA-Interface Methods + def __repr__(self): return '<{}>\n{}\nLength: {}, dtype: {}'.format( self.__class__.__name__, @@ -456,6 +492,8 @@ def value_counts(self, dropna=False): name=result.index.name) return Series(result.values, index=index, name=result.name) + # -------------------------------------------------------------------- + def shift(self, periods=1): """ Shift values by desired number. @@ -567,49 +605,9 @@ def asfreq(self, freq=None, how='E'): return type(self)(new_data, freq=freq) - def to_timestamp(self, freq=None, how='start'): - """ - Cast to DatetimeArray/Index - - Parameters - ---------- - freq : string or DateOffset, optional - Target frequency. The default is 'D' for week or longer, - 'S' otherwise - how : {'s', 'e', 'start', 'end'} - - Returns - ------- - DatetimeArray/Index - """ - from pandas.core.arrays import DatetimeArrayMixin - - how = libperiod._validate_end_alias(how) - - end = how == 'E' - if end: - if freq == 'B': - # roll forward to ensure we land on B date - adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') - return self.to_timestamp(how='start') + adjust - else: - adjust = Timedelta(1, 'ns') - return (self + self.freq).to_timestamp(how='start') - adjust - - if freq is None: - base, mult = frequencies.get_freq_code(self.freq) - freq = frequencies.get_to_timestamp_base(base) - else: - freq = Period._maybe_convert_freq(freq) - - base, mult = frequencies.get_freq_code(freq) - new_data = self.asfreq(freq, how=how) - - new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) - return DatetimeArrayMixin(new_data, freq='infer') - # ------------------------------------------------------------------ # Formatting + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): """ actually format my specific types """ # TODO(DatetimeArray): remove @@ -630,9 +628,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): values = np.array([formatter(dt) for dt in values]) return values + # Delegation... + def strftime(self, date_format): + return self._format_native_types(date_format=date_format) + def repeat(self, repeats, *args, **kwargs): """ - Repeat elements of a Categorical. + Repeat elements of a PeriodArray. See also -------- @@ -643,10 +645,6 @@ def repeat(self, repeats, *args, **kwargs): values = self._data.repeat(repeats) return type(self)(values, self.freq) - # Delegation... - def strftime(self, date_format): - return self._format_native_types(date_format=date_format) - def astype(self, dtype, copy=True): # TODO: Figure out something better here... # We have DatetimeLikeArrayMixin -> diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index a63b3fb53625f..672261c2a407e 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -284,6 +284,83 @@ def is_dtype(cls, dtype): return True return isinstance(dtype, np.dtype) or dtype == 'Sparse' + def update_dtype(self, dtype): + """Convert the SparseDtype to a new dtype. + + This takes care of converting the ``fill_value``. + + Parameters + ---------- + dtype : Union[str, numpy.dtype, SparseDtype] + The new dtype to use. + + * For a SparseDtype, it is simply returned + * For a NumPy dtype (or str), the current fill value + is converted to the new dtype, and a SparseDtype + with `dtype` and the new fill value is returned. + + Returns + ------- + SparseDtype + A new SparseDtype with the corret `dtype` and fill value + for that `dtype`. + + Raises + ------ + ValueError + When the current fill value cannot be converted to the + new `dtype` (e.g. trying to convert ``np.nan`` to an + integer dtype). + + + Examples + -------- + >>> SparseDtype(int, 0).update_dtype(float) + Sparse[float64, 0.0] + + >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) + Sparse[float64, nan] + """ + cls = type(self) + dtype = pandas_dtype(dtype) + + if not isinstance(dtype, cls): + fill_value = astype_nansafe(np.array(self.fill_value), + dtype).item() + dtype = cls(dtype, fill_value=fill_value) + + return dtype + + @property + def _subtype_with_str(self): + """ + Whether the SparseDtype's subtype should be considered ``str``. + + Typically, pandas will store string data in an object-dtype array. + When converting values to a dtype, e.g. in ``.astype``, we need to + be more specific, we need the actual underlying type. + + Returns + ------- + + >>> SparseDtype(int, 1)._subtype_with_str + dtype('int64') + + >>> SparseDtype(object, 1)._subtype_with_str + dtype('O') + + >>> dtype = SparseDtype(str, '') + >>> dtype.subtype + dtype('O') + + >>> dtype._subtype_with_str + str + """ + if isinstance(self.fill_value, compat.string_types): + return type(self.fill_value) + return self.subtype + + # ---------------------------------------------------------------------------- # Array @@ -614,7 +691,7 @@ def __array__(self, dtype=None, copy=True): # Can't put pd.NaT in a datetime64[ns] fill_value = np.datetime64('NaT') try: - dtype = np.result_type(self.sp_values.dtype, fill_value) + dtype = np.result_type(self.sp_values.dtype, type(fill_value)) except TypeError: dtype = object @@ -996,7 +1073,7 @@ def _take_with_fill(self, indices, fill_value=None): if len(self) == 0: # Empty... Allow taking only if all empty if (indices == -1).all(): - dtype = np.result_type(self.sp_values, fill_value) + dtype = np.result_type(self.sp_values, type(fill_value)) taken = np.empty_like(indices, dtype=dtype) taken.fill(fill_value) return taken @@ -1009,7 +1086,7 @@ def _take_with_fill(self, indices, fill_value=None): if self.sp_index.npoints == 0: # Avoid taking from the empty self.sp_values taken = np.full(sp_indexer.shape, fill_value=fill_value, - dtype=np.result_type(fill_value)) + dtype=np.result_type(type(fill_value))) else: taken = self.sp_values.take(sp_indexer) @@ -1030,12 +1107,13 @@ def _take_with_fill(self, indices, fill_value=None): result_type = taken.dtype if m0.any(): - result_type = np.result_type(result_type, self.fill_value) + result_type = np.result_type(result_type, + type(self.fill_value)) taken = taken.astype(result_type) taken[old_fill_indices] = self.fill_value if m1.any(): - result_type = np.result_type(result_type, fill_value) + result_type = np.result_type(result_type, type(fill_value)) taken = taken.astype(result_type) taken[new_fill_indices] = fill_value @@ -1061,7 +1139,7 @@ def _take_without_fill(self, indices): # edge case in take... # I think just return out = np.full(indices.shape, self.fill_value, - dtype=np.result_type(self.fill_value)) + dtype=np.result_type(type(self.fill_value))) arr, sp_index, fill_value = make_sparse(out, fill_value=self.fill_value) return type(self)(arr, sparse_index=sp_index, @@ -1073,7 +1151,7 @@ def _take_without_fill(self, indices): if fillable.any(): # TODO: may need to coerce array to fill value - result_type = np.result_type(taken, self.fill_value) + result_type = np.result_type(taken, type(self.fill_value)) taken = taken.astype(result_type) taken[fillable] = self.fill_value @@ -1093,7 +1171,9 @@ def _concat_same_type(cls, to_concat): fill_value = fill_values[0] - if len(set(fill_values)) > 1: + # np.nan isn't a singleton, so we may end up with multiple + # NaNs here, so we ignore tha all NA case too. + if not (len(set(fill_values)) == 1 or isna(fill_values).all()): warnings.warn("Concatenating sparse arrays with multiple fill " "values: '{}'. Picking the first and " "converting the rest.".format(fill_values), @@ -1212,13 +1292,10 @@ def astype(self, dtype=None, copy=True): IntIndex Indices: array([2, 3], dtype=int32) """ - dtype = pandas_dtype(dtype) - - if not isinstance(dtype, SparseDtype): - dtype = SparseDtype(dtype, fill_value=self.fill_value) - + dtype = self.dtype.update_dtype(dtype) + subtype = dtype._subtype_with_str sp_values = astype_nansafe(self.sp_values, - dtype.subtype, + subtype, copy=copy) if sp_values is self.sp_values and copy: sp_values = sp_values.copy() diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1f78e0c00bf00..9dbdd6ff8b562 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -190,6 +190,9 @@ def _generate_range(cls, start, end, periods, freq, closed=None): return cls._simple_new(index, freq=freq) + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + # ---------------------------------------------------------------- # Arithmetic Methods @@ -412,20 +415,25 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): array : list-like copy : bool, default False unit : str, default "ns" + The timedelta unit to treat integers as multiples of. errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. Returns ------- - ndarray[timedelta64[ns]] + converted : numpy.ndarray + The sequence converted to a numpy array with dtype ``timedelta64[ns]``. inferred_freq : Tick or None + The inferred frequency of the sequence. Raises ------ - ValueError : data cannot be converted to timedelta64[ns] + ValueError : Data cannot be converted to timedelta64[ns]. Notes ----- - Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause errors to be ignored; they are caught and subsequently ignored at a higher level. """ @@ -497,12 +505,13 @@ def ints_to_td64ns(data, unit="ns"): Parameters ---------- - data : np.ndarray with integer-dtype + data : numpy.ndarray with integer-dtype unit : str, default "ns" + The timedelta unit to treat integers as multiples of. Returns ------- - ndarray[timedelta64[ns]] + numpy.ndarray : timedelta64[ns] array converted from data bool : whether a copy was made """ copy_made = False @@ -538,15 +547,18 @@ def objects_to_td64ns(data, unit="ns", errors="raise"): ---------- data : ndarray or Index unit : str, default "ns" + The timedelta unit to treat integers as multiples of. errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. Returns ------- - ndarray[timedelta64[ns]] + numpy.ndarray : timedelta64[ns] array converted from data Raises ------ - ValueError : data cannot be converted to timedelta64[ns] + ValueError : Data cannot be converted to timedelta64[ns]. Notes ----- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 34f25c5634d5b..2c7f6ae8e3533 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11,6 +11,8 @@ import pandas as pd from pandas._libs import properties, Timestamp, iNaT +from pandas.errors import AbstractMethodError + from pandas.core.dtypes.common import ( ensure_int64, ensure_object, @@ -200,7 +202,7 @@ def _constructor(self): """Used when a manipulation result has the same dimensions as the original. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def __unicode__(self): # unicode representation based upon iterating over self @@ -221,7 +223,7 @@ def _constructor_sliced(self): """Used when a manipulation result has one lower dimension(s) as the original, such as DataFrame single columns slicing. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @property def _constructor_expanddim(self): @@ -2884,7 +2886,7 @@ def _iget_item_cache(self, item): return lower def _box_item_values(self, key, values): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 451f1199ac8e6..b0477c7d3a8ad 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -18,6 +18,7 @@ import pandas.compat as compat from pandas.compat import lzip, map from pandas.compat.numpy import _np_version_under1p13 +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -240,7 +241,7 @@ def _aggregate_generic(self, func, *args, **kwargs): return self._wrap_generic_output(result, obj) def _wrap_aggregated_output(self, output, names=None): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _aggregate_item_by_item(self, func, *args, **kwargs): # only for axis==0 @@ -1659,4 +1660,4 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): raise ValueError("axis value must be greater than 0") def _wrap_aggregated_output(self, output, names=None): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ea7507799fa9a..12327e1cf148e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -20,6 +20,7 @@ class providing the base-class of operations. import pandas.compat as compat from pandas.compat import callable, range, set_function_name, zip from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.util._validators import validate_kwargs @@ -706,7 +707,7 @@ def _iterate_slices(self): yield self._selection_name, self._selected_obj def transform(self, func, *args, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _cumcount_array(self, ascending=True): """ @@ -861,7 +862,7 @@ def _python_agg_general(self, func, *args, **kwargs): return self._wrap_aggregated_output(output) def _wrap_applied_output(self, *args, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 390334a89cbfe..125bd9a5e855d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -13,6 +13,7 @@ from pandas._libs import NaT, groupby as libgroupby, iNaT, lib, reduction from pandas.compat import lzip, range, zip +from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -841,7 +842,7 @@ def _chop(self, sdata, slice_obj): return sdata.iloc[slice_obj] def apply(self, f): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SeriesSplitter(DataSplitter): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 59429488a7c2f..4547f47314bad 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -4,44 +4,33 @@ """ import warnings -from pandas import compat -from pandas.compat.numpy import function as nv -from pandas.core.tools.timedeltas import to_timedelta - import numpy as np -from pandas._libs import lib, iNaT, NaT -from pandas._libs.tslibs.timestamps import round_nsint64, RoundTo +from pandas._libs import NaT, iNaT, lib +from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64 +import pandas.compat as compat +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - ensure_int64, - is_dtype_equal, - is_float, - is_integer, - is_list_like, - is_scalar, - is_bool_dtype, - is_period_dtype, - is_categorical_dtype, - is_datetime_or_timedelta_dtype, - is_float_dtype, - is_integer_dtype, - is_object_dtype, - is_string_dtype) -from pandas.core.dtypes.generic import ( - ABCIndex, ABCSeries, ABCIndexClass) + ensure_int64, is_bool_dtype, is_categorical_dtype, + is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, + is_integer, is_integer_dtype, is_list_like, is_object_dtype, + is_period_dtype, is_scalar, is_string_dtype) +import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna -from pandas.core import common as com, algorithms, ops - -import pandas.io.formats.printing as printing +from pandas.core import algorithms, ops from pandas.core.arrays import PeriodArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin +import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs -from pandas.util._decorators import Appender, cache_readonly -import pandas.core.dtypes.concat as _concat +from pandas.core.tools.timedeltas import to_timedelta + +import pandas.io.formats.printing as printing -import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -543,7 +532,7 @@ def argmax(self, axis=None, *args, **kwargs): @property def _formatter_func(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _format_attrs(self): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c82cff19573e3..b754b2705d034 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1,57 +1,45 @@ # pylint: disable=E1101 from __future__ import division + +from datetime import datetime, time, timedelta import operator import warnings -from datetime import time, datetime, timedelta import numpy as np from pytz import utc -from pandas.core.base import _shared_docs +from pandas._libs import ( + Timestamp, index as libindex, join as libjoin, lib, tslib as libts) +from pandas._libs.tslibs import ( + ccalendar, conversion, fields, parsing, timezones) +import pandas.compat as compat +from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( - _INT64_DTYPE, - _NS_DTYPE, - is_datetime64_dtype, - is_datetimetz, - is_dtype_equal, - is_integer, - is_float, - is_integer_dtype, - is_datetime64_ns_dtype, - is_period_dtype, - is_string_like, - is_list_like, - is_scalar, - pandas_dtype, - ensure_int64) + _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, + is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float, + is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar, + is_string_like, pandas_dtype) +import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna -import pandas.core.dtypes.concat as _concat -from pandas.core.arrays.datetimes import DatetimeArrayMixin, _to_m8 from pandas.core.arrays import datetimelike as dtl - +from pandas.core.arrays.datetimes import ( + DatetimeArrayMixin as DatetimeArray, _to_m8) +from pandas.core.base import _shared_docs +import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.datetimelike import ( + DatelikeOps, DatetimeIndexOpsMixin, TimelikeOps, wrap_array_method, + wrap_field_accessor) from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name -import pandas.compat as compat -from pandas.tseries.frequencies import to_offset, Resolution -from pandas.core.indexes.datetimelike import ( - DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, - wrap_field_accessor, wrap_array_method) -from pandas.tseries.offsets import ( - CDay, prefix_mapping) - -from pandas.util._decorators import Appender, cache_readonly, Substitution -import pandas.core.common as com -import pandas.tseries.offsets as offsets import pandas.core.tools.datetimes as tools -from pandas._libs import (lib, index as libindex, tslib as libts, - join as libjoin, Timestamp) -from pandas._libs.tslibs import (timezones, conversion, fields, parsing, - ccalendar) +from pandas.tseries import offsets +from pandas.tseries.frequencies import Resolution, to_offset +from pandas.tseries.offsets import CDay, prefix_mapping def _new_DatetimeIndex(cls, d): @@ -68,7 +56,7 @@ def _new_DatetimeIndex(cls, d): return result -class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, +class DatetimeIndex(DatetimeArray, DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray of datetime64 data, represented internally as int64, and @@ -182,8 +170,6 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, pandas.to_datetime : Convert argument to datetime """ - _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget) - _typ = 'datetimeindex' _join_precedence = 10 @@ -227,8 +213,6 @@ def _join_i8_wrapper(joinf, **kwargs): _is_numeric_dtype = False _infer_as_myclass = True - _timezone = cache_readonly(DatetimeArrayMixin._timezone.fget) - is_normalized = cache_readonly(DatetimeArrayMixin.is_normalized.fget) # -------------------------------------------------------------------- # Constructors @@ -268,8 +252,7 @@ def __new__(cls, data=None, # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if not isinstance(data, (np.ndarray, Index, ABCSeries, - DatetimeArrayMixin)): + if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)): # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) @@ -283,7 +266,7 @@ def __new__(cls, data=None, data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - if isinstance(data, DatetimeArrayMixin): + if isinstance(data, DatetimeArray): if tz is None: tz = data.tz elif data.tz is None: @@ -1125,43 +1108,47 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- # Wrapping DatetimeArray - year = wrap_field_accessor(DatetimeArrayMixin.year) - month = wrap_field_accessor(DatetimeArrayMixin.month) - day = wrap_field_accessor(DatetimeArrayMixin.day) - hour = wrap_field_accessor(DatetimeArrayMixin.hour) - minute = wrap_field_accessor(DatetimeArrayMixin.minute) - second = wrap_field_accessor(DatetimeArrayMixin.second) - microsecond = wrap_field_accessor(DatetimeArrayMixin.microsecond) - nanosecond = wrap_field_accessor(DatetimeArrayMixin.nanosecond) - weekofyear = wrap_field_accessor(DatetimeArrayMixin.weekofyear) + _timezone = cache_readonly(DatetimeArray._timezone.fget) + is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) + _resolution = cache_readonly(DatetimeArray._resolution.fget) + + year = wrap_field_accessor(DatetimeArray.year) + month = wrap_field_accessor(DatetimeArray.month) + day = wrap_field_accessor(DatetimeArray.day) + hour = wrap_field_accessor(DatetimeArray.hour) + minute = wrap_field_accessor(DatetimeArray.minute) + second = wrap_field_accessor(DatetimeArray.second) + microsecond = wrap_field_accessor(DatetimeArray.microsecond) + nanosecond = wrap_field_accessor(DatetimeArray.nanosecond) + weekofyear = wrap_field_accessor(DatetimeArray.weekofyear) week = weekofyear - dayofweek = wrap_field_accessor(DatetimeArrayMixin.dayofweek) + dayofweek = wrap_field_accessor(DatetimeArray.dayofweek) weekday = dayofweek - weekday_name = wrap_field_accessor(DatetimeArrayMixin.weekday_name) + weekday_name = wrap_field_accessor(DatetimeArray.weekday_name) - dayofyear = wrap_field_accessor(DatetimeArrayMixin.dayofyear) - quarter = wrap_field_accessor(DatetimeArrayMixin.quarter) - days_in_month = wrap_field_accessor(DatetimeArrayMixin.days_in_month) + dayofyear = wrap_field_accessor(DatetimeArray.dayofyear) + quarter = wrap_field_accessor(DatetimeArray.quarter) + days_in_month = wrap_field_accessor(DatetimeArray.days_in_month) daysinmonth = days_in_month - is_month_start = wrap_field_accessor(DatetimeArrayMixin.is_month_start) - is_month_end = wrap_field_accessor(DatetimeArrayMixin.is_month_end) - is_quarter_start = wrap_field_accessor(DatetimeArrayMixin.is_quarter_start) - is_quarter_end = wrap_field_accessor(DatetimeArrayMixin.is_quarter_end) - is_year_start = wrap_field_accessor(DatetimeArrayMixin.is_year_start) - is_year_end = wrap_field_accessor(DatetimeArrayMixin.is_year_end) - is_leap_year = wrap_field_accessor(DatetimeArrayMixin.is_leap_year) - - tz_localize = wrap_array_method(DatetimeArrayMixin.tz_localize, True) - tz_convert = wrap_array_method(DatetimeArrayMixin.tz_convert, True) - to_perioddelta = wrap_array_method(DatetimeArrayMixin.to_perioddelta, + is_month_start = wrap_field_accessor(DatetimeArray.is_month_start) + is_month_end = wrap_field_accessor(DatetimeArray.is_month_end) + is_quarter_start = wrap_field_accessor(DatetimeArray.is_quarter_start) + is_quarter_end = wrap_field_accessor(DatetimeArray.is_quarter_end) + is_year_start = wrap_field_accessor(DatetimeArray.is_year_start) + is_year_end = wrap_field_accessor(DatetimeArray.is_year_end) + is_leap_year = wrap_field_accessor(DatetimeArray.is_leap_year) + + tz_localize = wrap_array_method(DatetimeArray.tz_localize, True) + tz_convert = wrap_array_method(DatetimeArray.tz_convert, True) + to_perioddelta = wrap_array_method(DatetimeArray.to_perioddelta, False) - to_period = wrap_array_method(DatetimeArrayMixin.to_period, True) - normalize = wrap_array_method(DatetimeArrayMixin.normalize, True) - to_julian_date = wrap_array_method(DatetimeArrayMixin.to_julian_date, + to_period = wrap_array_method(DatetimeArray.to_period, True) + normalize = wrap_array_method(DatetimeArray.normalize, True) + to_julian_date = wrap_array_method(DatetimeArray.to_julian_date, False) - month_name = wrap_array_method(DatetimeArrayMixin.month_name, True) - day_name = wrap_array_method(DatetimeArrayMixin.day_name, True) + month_name = wrap_array_method(DatetimeArray.month_name, True) + day_name = wrap_array_method(DatetimeArray.day_name, True) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 128068959ebd3..7890f03a1eba7 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -256,8 +256,12 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result._reset_identity() return result + # ------------------------------------------------------------------------ + # Wrapping PeriodArray + # ------------------------------------------------------------------------ # Data + @property def _ndarray_values(self): return self._data._ndarray_values @@ -361,13 +365,6 @@ def asfreq(self, freq=None, how='E'): result = self._data.asfreq(freq=freq, how=how) return self._simple_new(result, name=self.name) - def _nat_new(self, box=True): - # TODO(DatetimeArray): remove this - result = self._data._nat_new(box=box) - if box: - result = self._simple_new(result, name=self.name) - return result - def to_timestamp(self, freq=None, how='start'): from pandas import DatetimeIndex result = self._data.to_timestamp(freq=freq, how=how) @@ -425,6 +422,7 @@ def _maybe_convert_timedelta(self, other): # ------------------------------------------------------------------------ # Indexing + @cache_readonly def _engine(self): return self._engine_type(lambda: self, len(self)) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 35e17c7400892..d9625d38b85de 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -2,44 +2,36 @@ from datetime import datetime import numpy as np + +from pandas._libs import ( + NaT, Timedelta, index as libindex, join as libjoin, lib) +import pandas.compat as compat +from pandas.util._decorators import Appender, Substitution + from pandas.core.dtypes.common import ( - _TD_DTYPE, - is_integer, - is_float, - is_list_like, - is_scalar, - is_timedelta64_dtype, - is_timedelta64_ns_dtype, - pandas_dtype, - ensure_int64) + _TD_DTYPE, ensure_int64, is_float, is_integer, is_list_like, is_scalar, + is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) +import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna +from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.timedeltas import ( - TimedeltaArrayMixin, _is_convertible_to_td, _to_m8, + TimedeltaArrayMixin as TimedeltaArray, _is_convertible_to_td, _to_m8, sequence_to_td64ns) -from pandas.core.arrays import datetimelike as dtl - -from pandas.core.indexes.base import Index -from pandas.core.indexes.numeric import Int64Index -import pandas.compat as compat - -from pandas.tseries.frequencies import to_offset from pandas.core.base import _shared_docs -from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com -from pandas.core.ops import get_op_result_name -import pandas.core.dtypes.concat as _concat -from pandas.util._decorators import Appender, Substitution +from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.datetimelike import ( - TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op, - wrap_array_method, wrap_field_accessor) -from pandas.core.tools.timedeltas import ( - _coerce_scalar_to_timedelta_type) -from pandas._libs import (lib, index as libindex, - join as libjoin, Timedelta, NaT) + DatetimeIndexOpsMixin, TimelikeOps, wrap_arithmetic_op, wrap_array_method, + wrap_field_accessor) +from pandas.core.indexes.numeric import Int64Index +from pandas.core.ops import get_op_result_name +from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type + +from pandas.tseries.frequencies import to_offset -class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, +class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, TimelikeOps, Int64Index): """ Immutable ndarray of timedelta64 data, represented internally as int64, and @@ -223,8 +215,7 @@ def _maybe_update_attributes(self, attrs): return attrs def _evaluate_with_timedelta_like(self, other, op): - result = TimedeltaArrayMixin._evaluate_with_timedelta_like(self, other, - op) + result = TimedeltaArray._evaluate_with_timedelta_like(self, other, op) return wrap_arithmetic_op(self, other, result) def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): @@ -236,12 +227,12 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------- # Wrapping TimedeltaArray - days = wrap_field_accessor(TimedeltaArrayMixin.days) - seconds = wrap_field_accessor(TimedeltaArrayMixin.seconds) - microseconds = wrap_field_accessor(TimedeltaArrayMixin.microseconds) - nanoseconds = wrap_field_accessor(TimedeltaArrayMixin.nanoseconds) + days = wrap_field_accessor(TimedeltaArray.days) + seconds = wrap_field_accessor(TimedeltaArray.seconds) + microseconds = wrap_field_accessor(TimedeltaArray.microseconds) + nanoseconds = wrap_field_accessor(TimedeltaArray.nanoseconds) - total_seconds = wrap_array_method(TimedeltaArrayMixin.total_seconds, True) + total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True) # ------------------------------------------------------------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index 155cf566b4c40..3a67238a66450 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,12 +10,11 @@ import pandas.compat as compat from pandas.compat import BytesIO, StringIO, string_types, text_type from pandas.errors import ( # noqa - DtypeWarning, EmptyDataError, ParserError, ParserWarning) + AbstractMethodError, DtypeWarning, EmptyDataError, ParserError, + ParserWarning) from pandas.core.dtypes.common import is_file_like, is_number -import pandas.core.common as com - from pandas.io.formats.printing import pprint_thing # gh-12665: Alias for now and remove later. @@ -67,7 +66,7 @@ def __iter__(self): return self def __next__(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) if not compat.PY3: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 2e93c237bb7ea..c25a7670cce44 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -95,6 +95,10 @@ usecols : int, str, list-like, or callable default None * If None, then parse all columns, * If int, then indicates last column to be parsed + + .. deprecated:: 0.24.0 + Pass in a list of ints instead from 0 to `usecols` inclusive. + * If string, then indicates comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of both sides. @@ -778,6 +782,10 @@ def _maybe_convert_usecols(usecols): return usecols if is_integer(usecols): + warnings.warn(("Passing in an integer for `usecols` has been " + "deprecated. Please pass in a list of ints from " + "0 to `usecols` inclusive instead."), + FutureWarning, stacklevel=2) return lrange(usecols + 1) if isinstance(usecols, compat.string_types): diff --git a/pandas/io/html.py b/pandas/io/html.py index bcbb07c6dddfb..c967bdd29df1f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -12,12 +12,11 @@ from pandas.compat import ( binary_type, iteritems, lmap, lrange, raise_with_traceback, string_types, u) -from pandas.errors import EmptyDataError +from pandas.errors import AbstractMethodError, EmptyDataError from pandas.core.dtypes.common import is_list_like from pandas import Series -import pandas.core.common as com from pandas.io.common import _is_url, _validate_header_arg, urlopen from pandas.io.formats.printing import pprint_thing @@ -256,7 +255,7 @@ def _text_getter(self, obj): text : str or unicode The text from an individual DOM node. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_td(self, obj): """Return the td elements from a row element. @@ -271,7 +270,7 @@ def _parse_td(self, obj): list of node-like These are the elements of each row, i.e., the columns. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_thead_tr(self, table): """ @@ -286,7 +285,7 @@ def _parse_thead_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tbody_tr(self, table): """ @@ -305,7 +304,7 @@ def _parse_tbody_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tfoot_tr(self, table): """ @@ -320,7 +319,7 @@ def _parse_tfoot_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tables(self, doc, match, attrs): """ @@ -346,7 +345,7 @@ def _parse_tables(self, doc, match, attrs): list of node-like HTML elements to be parsed into raw data. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _equals_tag(self, obj, tag): """ @@ -365,7 +364,7 @@ def _equals_tag(self, obj, tag): boolean Whether `obj`'s tag name is `tag` """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _build_doc(self): """ @@ -376,7 +375,7 @@ def _build_doc(self): node-like The DOM from which to parse the table element. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_thead_tbody_tfoot(self, table_html): """ diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index af7b390de213d..4453416a97f89 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -7,11 +7,11 @@ import pandas._libs.json as json from pandas._libs.tslibs import iNaT from pandas.compat import StringIO, long, to_str, u +from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import is_period_dtype from pandas import DataFrame, MultiIndex, Series, compat, isna, to_datetime -import pandas.core.common as com from pandas.core.reshape.concat import concat from pandas.io.common import ( @@ -97,7 +97,7 @@ def __init__(self, obj, orient, date_format, double_precision, self._format_axes() def _format_axes(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def write(self): return self._write(self.obj, self.orient, self.double_precision, @@ -658,7 +658,7 @@ def _convert_axes(self): setattr(self.obj, axis, new_axis) def _try_convert_types(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): @@ -771,7 +771,7 @@ def _try_convert_to_date(self, data): return data, False def _try_convert_dates(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SeriesParser(Parser): diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 3d72b1ec3a47f..aad59f9805a3b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -4,9 +4,9 @@ from warnings import catch_warnings from pandas.compat import string_types +from pandas.errors import AbstractMethodError from pandas import DataFrame, get_option -import pandas.core.common as com from pandas.io.common import get_filepath_or_buffer, is_s3_url @@ -67,10 +67,10 @@ def validate_dataframe(df): raise ValueError("Index level names must be strings") def write(self, df, path, compression, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def read(self, path, columns=None, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class PyArrowImpl(BaseImpl): diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index cb2ee64100728..75b2d0366b06a 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -21,7 +21,8 @@ import pandas.compat as compat from pandas.compat import ( PY3, StringIO, lrange, lzip, map, range, string_types, u, zip) -from pandas.errors import EmptyDataError, ParserError, ParserWarning +from pandas.errors import ( + AbstractMethodError, EmptyDataError, ParserError, ParserWarning) from pandas.util._decorators import Appender from pandas.core.dtypes.cast import astype_nansafe @@ -34,7 +35,6 @@ from pandas.core import algorithms from pandas.core.arrays import Categorical -import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.index import ( Index, MultiIndex, RangeIndex, ensure_index_from_sequences) @@ -1064,7 +1064,7 @@ def _make_engine(self, engine='c'): self._engine = klass(self.f, **self.options) def _failover_to_python(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def read(self, nrows=None): nrows = _validate_integer('nrows', nrows) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 41e14e482d061..4c28e0f88b1ae 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1674,7 +1674,7 @@ def cvalues(self): def __iter__(self): return iter(self.values) - def maybe_set_size(self, min_itemsize=None, **kwargs): + def maybe_set_size(self, min_itemsize=None): """ maybe set a string col itemsize: min_itemsize can be an integer or a dict with this columns name with an integer size """ @@ -1687,13 +1687,13 @@ def maybe_set_size(self, min_itemsize=None, **kwargs): self.typ = _tables( ).StringCol(itemsize=min_itemsize, pos=self.pos) - def validate(self, handler, append, **kwargs): + def validate(self, handler, append): self.validate_names() def validate_names(self): pass - def validate_and_set(self, handler, append, **kwargs): + def validate_and_set(self, handler, append): self.set_table(handler.table) self.validate_col() self.validate_attr(append) @@ -3772,7 +3772,7 @@ def read_coordinates(self, where=None, start=None, stop=None, **kwargs): return Index(coords) - def read_column(self, column, where=None, start=None, stop=None, **kwargs): + def read_column(self, column, where=None, start=None, stop=None): """return a single column from the table, generally only indexables are interesting """ @@ -4727,7 +4727,7 @@ class Selection(object): """ - def __init__(self, table, where=None, start=None, stop=None, **kwargs): + def __init__(self, table, where=None, start=None, stop=None): self.table = table self.where = where self.start = start diff --git a/pandas/io/stata.py b/pandas/io/stata.py index df0d47b063411..66e996075f1ed 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -461,7 +461,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): excessive_string_length_error = """ Fixed width strings in Stata .dta files are limited to 244 (or fewer) -characters. Column '%s' does not satisfy this restriction. +characters. Column '%s' does not satisfy this restriction. Use the +'version=117' parameter to write the newer (Stata 13 and later) format. """ diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 405c534e8528b..1c70ece434abb 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -12,6 +12,7 @@ from pandas.util._decorators import cache_readonly, Appender from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat +from pandas.errors import AbstractMethodError import pandas.core.common as com from pandas.core.base import PandasObject @@ -373,7 +374,7 @@ def _compute_plot_data(self): self.data = numeric_data def _make_plot(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _add_table(self): if self.table is False: diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index c15696705ab82..0e5a8280cc467 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -477,6 +477,34 @@ def test_astype_all(self, any_real_dtype): tm.assert_numpy_array_equal(np.asarray(res.values), vals.astype(typ)) + @pytest.mark.parametrize('array, dtype, expected', [ + (SparseArray([0, 1]), 'float', + SparseArray([0., 1.], dtype=SparseDtype(float, 0.0))), + (SparseArray([0, 1]), bool, SparseArray([False, True])), + (SparseArray([0, 1], fill_value=1), bool, + SparseArray([False, True], dtype=SparseDtype(bool, True))), + pytest.param( + SparseArray([0, 1]), 'datetime64[ns]', + SparseArray(np.array([0, 1], dtype='datetime64[ns]'), + dtype=SparseDtype('datetime64[ns]', + pd.Timestamp('1970'))), + marks=[pytest.mark.xfail(reason="NumPy-7619", strict=True)], + ), + (SparseArray([0, 1, 10]), str, + SparseArray(['0', '1', '10'], dtype=SparseDtype(str, '0'))), + (SparseArray(['10', '20']), float, SparseArray([10.0, 20.0])), + (SparseArray([0, 1, 0]), object, + SparseArray([0, 1, 0], dtype=SparseDtype(object, 0))), + ]) + def test_astype_more(self, array, dtype, expected): + result = array.astype(dtype) + tm.assert_sp_array_equal(result, expected) + + def test_astype_nan_raises(self): + arr = SparseArray([1.0, np.nan]) + with pytest.raises(ValueError, match='Cannot convert non-finite'): + arr.astype(int) + def test_set_fill_value(self): arr = SparseArray([1., np.nan, 2.], fill_value=np.nan) arr.fill_value = 2 diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 7c310693cf26c..2d386de0d31a3 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -139,3 +139,23 @@ def test_parse_subtype(string, expected): def test_construct_from_string_fill_value_raises(string): with pytest.raises(TypeError, match='fill_value in the string is not'): SparseDtype.construct_from_string(string) + + +@pytest.mark.parametrize('original, dtype, expected', [ + (SparseDtype(int, 0), float, SparseDtype(float, 0.0)), + (SparseDtype(int, 1), float, SparseDtype(float, 1.0)), + (SparseDtype(int, 1), str, SparseDtype(object, '1')), + (SparseDtype(float, 1.5), int, SparseDtype(int, 1)), +]) +def test_update_dtype(original, dtype, expected): + result = original.update_dtype(dtype) + assert result == expected + + +@pytest.mark.parametrize("original, dtype", [ + (SparseDtype(float, np.nan), int), + (SparseDtype(str, 'abc'), int), +]) +def test_update_dtype_raises(original, dtype): + with pytest.raises(ValueError): + original.update_dtype(dtype) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 5ba99a48e34ad..bb4022c9cac9a 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -4,7 +4,8 @@ import pandas as pd from pandas.core.arrays import ( - DatetimeArrayMixin, PeriodArray, TimedeltaArrayMixin) + DatetimeArrayMixin as DatetimeArray, PeriodArray, + TimedeltaArrayMixin as TimedeltaArray) import pandas.util.testing as tm @@ -61,7 +62,7 @@ def test_array_object_dtype(self, tz_naive_fixture): # GH#23524 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = np.array(list(dti)) @@ -76,7 +77,7 @@ def test_array(self, tz_naive_fixture): # GH#23524 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.asi8.view('M8[ns]') result = np.array(arr) @@ -91,7 +92,7 @@ def test_array_i8_dtype(self, tz_naive_fixture): # GH#23524 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.asi8 result = np.array(arr, dtype='i8') @@ -108,7 +109,7 @@ def test_array_i8_dtype(self, tz_naive_fixture): def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) assert list(dti) == list(arr) # Check that Index.__new__ knows what to do with DatetimeArray @@ -119,7 +120,7 @@ def test_from_dti(self, tz_naive_fixture): def test_astype_object(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' @@ -129,11 +130,11 @@ def test_astype_object(self, tz_naive_fixture): def test_to_perioddelta(self, datetime_index, freqstr): # GH#23113 dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.to_perioddelta(freq=freqstr) result = arr.to_perioddelta(freq=freqstr) - assert isinstance(result, TimedeltaArrayMixin) + assert isinstance(result, TimedeltaArray) # placeholder until these become actual EA subclasses and we can use # an EA-specific tm.assert_ function @@ -142,7 +143,7 @@ def test_to_perioddelta(self, datetime_index, freqstr): @pytest.mark.parametrize('freqstr', ['D', 'B', 'W', 'M', 'Q', 'Y']) def test_to_period(self, datetime_index, freqstr): dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.to_period(freq=freqstr) result = arr.to_period(freq=freqstr) @@ -156,7 +157,7 @@ def test_to_period(self, datetime_index, freqstr): def test_bool_properties(self, datetime_index, propname): # in this case _bool_ops is just `is_leap_year` dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) assert dti.freq == arr.freq result = getattr(arr, propname) @@ -167,7 +168,7 @@ def test_bool_properties(self, datetime_index, propname): @pytest.mark.parametrize('propname', pd.DatetimeIndex._field_ops) def test_int_properties(self, datetime_index, propname): dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) result = getattr(arr, propname) expected = np.array(getattr(dti, propname), dtype=result.dtype) @@ -178,7 +179,7 @@ def test_int_properties(self, datetime_index, propname): class TestTimedeltaArray(object): def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) assert list(arr) == list(tdi) # Check that Index.__new__ knows what to do with TimedeltaArray @@ -188,7 +189,7 @@ def test_from_tdi(self): def test_astype_object(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' @@ -196,7 +197,7 @@ def test_astype_object(self): def test_to_pytimedelta(self, timedelta_index): tdi = timedelta_index - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) expected = tdi.to_pytimedelta() result = arr.to_pytimedelta() @@ -205,7 +206,7 @@ def test_to_pytimedelta(self, timedelta_index): def test_total_seconds(self, timedelta_index): tdi = timedelta_index - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) expected = tdi.total_seconds() result = arr.total_seconds() @@ -215,7 +216,7 @@ def test_total_seconds(self, timedelta_index): @pytest.mark.parametrize('propname', pd.TimedeltaIndex._field_ops) def test_int_properties(self, timedelta_index, propname): tdi = timedelta_index - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) result = getattr(arr, propname) expected = np.array(getattr(tdi, propname), dtype=result.dtype) @@ -248,9 +249,9 @@ def test_to_timestamp(self, how, period_index): pi = period_index arr = PeriodArray(pi) - expected = DatetimeArrayMixin(pi.to_timestamp(how=how)) + expected = DatetimeArray(pi.to_timestamp(how=how)) result = arr.to_timestamp(how=how) - assert isinstance(result, DatetimeArrayMixin) + assert isinstance(result, DatetimeArray) # placeholder until these become actual EA subclasses and we can use # an EA-specific tm.assert_ function diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index a15295cfbd81a..2b630b98b69a2 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Tests for DatetimeArray """ diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 95a1d1781456c..63b34db13705e 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -190,7 +190,7 @@ def test_setitem_raises_type(): # ---------------------------------------------------------------------------- # Ops -def tet_sub_period(): +def test_sub_period(): arr = period_array(['2000', '2001'], freq='D') other = pd.Period("2000", freq="M") with pytest.raises(IncompatibleFrequency, match="freq"): diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py new file mode 100644 index 0000000000000..3ff807daeeab9 --- /dev/null +++ b/pandas/tests/arrays/test_timedeltas.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + + +class TestTimedeltaArray(object): + pass diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 18690a18f7cb3..67a3bd6f9b75e 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -458,6 +458,22 @@ def test_read_chunksize_generated_index(self): tm.assert_frame_equal(pd.concat(reader), df) + def test_read_chunksize_jagged_names(self): + # see gh-23509 + data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)]) + reader = self.read_csv(StringIO(data), names=range(10), chunksize=4) + + expected = DataFrame() + + for i in range(10): + if i == 0: + expected[i] = [0] * 8 + else: + expected[i] = [np.nan] * 7 + [0] + + result = pd.concat(reader) + tm.assert_frame_equal(result, expected) + def test_read_text_list(self): data = """A,B,C\nfoo,1,2,3\nbar,4,5,6""" as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar', diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 50d927176a7b4..21286e9b82323 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -6,9 +6,9 @@ from pandas._libs.tslib import Timestamp from pandas.compat import StringIO +from pandas.errors import AbstractMethodError from pandas import DataFrame, read_csv, read_table -import pandas.core.common as com import pandas.util.testing as tm from .c_parser_only import CParserTests @@ -46,7 +46,7 @@ def read_table(self, *args, **kwargs): raise NotImplementedError def float_precision_choices(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @pytest.fixture(autouse=True) def setup_method(self, datapath): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 49a3a3d58672d..9b147d53c06c4 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -105,23 +105,34 @@ def get_exceldf(self, basename, ext, *args, **kwds): class ReadingTestsBase(SharedItems): # This is based on ExcelWriterBase - @td.skip_if_no('xlrd', '1.0.1') # GH-22682 + @td.skip_if_no("xlrd", "1.0.1") # see gh-22682 def test_usecols_int(self, ext): - dfref = self.get_csv_refdf('test1') - dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, usecols=3) + df_ref = self.get_csv_refdf("test1") + df_ref = df_ref.reindex(columns=["A", "B", "C"]) - with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + # usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df1 = self.get_exceldf("test1", ext, "Sheet1", + index_col=0, usecols=3) + + # usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], + index_col=0, usecols=3) + + # parse_cols instead of usecols, usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df3 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], index_col=0, parse_cols=3) # TODO add index to xls file) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - tm.assert_frame_equal(df3, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + tm.assert_frame_equal(df3, df_ref, check_names=False) @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_usecols_list(self, ext): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 756385f0cfb56..7e0342e8b987a 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -24,12 +24,11 @@ notna, Timestamp, Timedelta) from pandas.compat import range, lrange, zip, OrderedDict -from pandas.errors import UnsupportedFunctionCall +from pandas.errors import AbstractMethodError, UnsupportedFunctionCall import pandas.tseries.offsets as offsets from pandas.tseries.offsets import Minute, BDay from pandas.core.groupby.groupby import DataError -import pandas.core.common as com from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import period_range, PeriodIndex, Period @@ -599,7 +598,7 @@ def index(self, _index_start, _index_end, _index_freq): @pytest.fixture def _series_name(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @pytest.fixture def _static_values(self, index): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 25c419e485db1..067a7d4622ca2 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -9,7 +9,6 @@ from pandas.core.dtypes.generic import ABCPeriod from pandas.core.tools.datetimes import to_datetime -import pandas.core.common as com # import after tools, dateutil check from dateutil.easter import easter @@ -29,6 +28,7 @@ roll_yearday, shift_month, BaseOffset) +from pandas.errors import AbstractMethodError __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay', @@ -1097,7 +1097,7 @@ def apply(self, other): def _apply(self, n, other): """Handle specific apply logic for child classes""" - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @apply_index_wraps def apply_index(self, i): @@ -1137,11 +1137,11 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): The roll array is based on the fact that i gets rolled back to the first day of the month. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _apply_index_days(self, i, roll): """Apply the correct day for each date in i""" - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SemiMonthEnd(SemiMonthOffset): diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index ccd5f56141a6a..c1bdab73c2671 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -350,6 +350,35 @@ def private_classes(self): This mentions NDFrame, which is not correct. """ + def unknown_section(self): + """ + This section has an unknown section title. + + Unknown Section + --------------- + This should raise an error in the validation. + """ + + def sections_in_wrong_order(self): + """ + This docstring has the sections in the wrong order. + + Parameters + ---------- + name : str + This section is in the right position. + + Examples + -------- + >>> print('So far Examples is good, as it goes before Parameters') + So far Examples is good, as it goes before Parameters + + See Also + -------- + function : This should generate an error, as See Also needs to go + before Examples. + """ + class BadSummaries(object): @@ -706,6 +735,11 @@ def test_bad_generic_functions(self, func): ('BadGenericDocStrings', 'private_classes', ("Private classes (NDFrame) should not be mentioned in public " 'docstrings',)), + ('BadGenericDocStrings', 'unknown_section', + ('Found unknown section "Unknown Section".',)), + ('BadGenericDocStrings', 'sections_in_wrong_order', + ('Wrong order of sections. "See Also" should be located before ' + '"Notes"',)), ('BadSeeAlso', 'desc_no_period', ('Missing period at end of description for See Also "Series.iloc"',)), ('BadSeeAlso', 'desc_first_letter_lowercase', diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index ed84e58049cae..7da77a1f60ad5 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -56,6 +56,9 @@ PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] DIRECTIVES = ['versionadded', 'versionchanged', 'deprecated'] +ALLOWED_SECTIONS = ['Parameters', 'Attributes', 'Methods', 'Returns', 'Yields', + 'Other Parameters', 'Raises', 'Warns', 'See Also', 'Notes', + 'References', 'Examples'] ERROR_MSGS = { 'GL01': 'Docstring text (summary) should start in the line immediately ' 'after the opening quotes (not in the same line, or leaving a ' @@ -69,6 +72,10 @@ 'mentioned in public docstrings', 'GL05': 'Tabs found at the start of line "{line_with_tabs}", please use ' 'whitespace only', + 'GL06': 'Found unknown section "{section}". Allowed sections are: ' + '{allowed_sections}', + 'GL07': 'Wrong order of sections. "{wrong_section}" should be located ' + 'before "{goes_before}", the right order is: {sorted_sections}', 'SS01': 'No summary found (a short summary in a single line should be ' 'present at the beginning of the docstring)', 'SS02': 'Summary does not start with a capital letter', @@ -353,6 +360,18 @@ def double_blank_lines(self): prev = row.strip() return False + @property + def section_titles(self): + sections = [] + self.doc._doc.reset() + while not self.doc._doc.eof(): + content = self.doc._read_to_next_section() + if (len(content) > 1 + and len(content[0]) == len(content[1]) + and set(content[1]) == {'-'}): + sections.append(content[0]) + return sections + @property def summary(self): return ' '.join(self.doc['Summary']) @@ -580,6 +599,25 @@ def validate_one(func_name): if re.match("^ *\t", line): errs.append(error('GL05', line_with_tabs=line.lstrip())) + unseen_sections = list(ALLOWED_SECTIONS) + for section in doc.section_titles: + if section not in ALLOWED_SECTIONS: + errs.append(error('GL06', + section=section, + allowed_sections=', '.join(ALLOWED_SECTIONS))) + else: + if section in unseen_sections: + section_idx = unseen_sections.index(section) + unseen_sections = unseen_sections[section_idx + 1:] + else: + section_idx = ALLOWED_SECTIONS.index(section) + goes_before = ALLOWED_SECTIONS[section_idx + 1] + errs.append(error('GL07', + sorted_sections=' > '.join(ALLOWED_SECTIONS), + wrong_section=section, + goes_before=goes_before)) + break + if not doc.summary: errs.append(error('SS01')) else: diff --git a/setup.cfg b/setup.cfg index 2e07182196d5b..9f5384170a245 100644 --- a/setup.cfg +++ b/setup.cfg @@ -120,9 +120,6 @@ skip= pandas/core/indexes/numeric.py, pandas/core/indexes/interval.py, pandas/core/indexes/multi.py, - pandas/core/indexes/timedeltas.py, - pandas/core/indexes/datetimelike.py, - pandas/core/indexes/datetimes.py, pandas/core/indexes/base.py, pandas/core/indexes/accessors.py, pandas/core/indexes/period.py,