Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into just-change-value-c…
Browse files Browse the repository at this point in the history
…ounts
  • Loading branch information
MarcoGorelli committed Dec 30, 2022
2 parents ec48816 + acd2f5f commit 016ddbb
Show file tree
Hide file tree
Showing 100 changed files with 1,704 additions and 941 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/docbuild-and-upload.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ on:
env:
ENV_FILE: environment.yml
PANDAS_CI: 1
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

permissions:
contents: read
Expand Down Expand Up @@ -45,6 +46,12 @@ jobs:
- name: Build Pandas
uses: ./.github/actions/build_pandas

- name: Set up maintainers cache
uses: actions/cache@v3
with:
path: maintainers.json
key: maintainers

- name: Build website
run: python web/pandas_web.py web/pandas --target-path=web/build

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/macos-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env:
PANDAS_CI: 1
PYTEST_TARGET: pandas
PATTERN: "not slow and not db and not network and not single_cpu"
TEST_ARGS: "-W error:::pandas"
ERROR_ON_WARNINGS: "1"


permissions:
Expand Down Expand Up @@ -53,7 +53,7 @@ jobs:
uses: ./.github/actions/setup-conda
with:
environment-file: ci/deps/${{ matrix.env_file }}
pyarrow-version: ${{ matrix.os == 'macos-latest' && '6' || '' }}
pyarrow-version: ${{ matrix.os == 'macos-latest' && '9' || '' }}

- name: Build Pandas
uses: ./.github/actions/build_pandas
Expand Down
27 changes: 17 additions & 10 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: "Minimum Versions"
env_file: actions-38-minimum_versions.yaml
pattern: "not slow and not network and not single_cpu"
test_args: ""
error_on_warnings: "0"
- name: "Locale: it_IT"
env_file: actions-38.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -63,40 +63,47 @@ jobs:
env_file: actions-310.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
test_args: ""
error_on_warnings: "0"
- name: "Data Manager"
env_file: actions-38.yaml
pattern: "not slow and not network and not single_cpu"
pandas_data_manager: "array"
test_args: ""
error_on_warnings: "0"
- name: "Pypy"
env_file: actions-pypy-38.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "--max-worker-restart 0"
error_on_warnings: "0"
- name: "Numpy Dev"
env_file: actions-310-numpydev.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy"
error_on_warnings: "0"
exclude:
- env_file: actions-39.yaml
pyarrow_version: "6"
- env_file: actions-39.yaml
- env_file: actions-38.yaml
pyarrow_version: "7"
- env_file: actions-310.yaml
pyarrow_version: "6"
- env_file: actions-310.yaml
- env_file: actions-38.yaml
pyarrow_version: "8"
- env_file: actions-38.yaml
pyarrow_version: "9"
- env_file: actions-39.yaml
pyarrow_version: "7"
- env_file: actions-39.yaml
pyarrow_version: "8"
- env_file: actions-39.yaml
pyarrow_version: "9"
fail-fast: false
name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }}
env:
ENV_FILE: ci/deps/${{ matrix.env_file }}
PATTERN: ${{ matrix.pattern }}
EXTRA_APT: ${{ matrix.extra_apt || '' }}
ERROR_ON_WARNINGS: ${{ matrix.error_on_warnings || '1' }}
LANG: ${{ matrix.lang || '' }}
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
TEST_ARGS: ${{ matrix.test_args || '-W error:::pandas' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
Expand Down
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -342,4 +342,3 @@ repos:
exclude: |
(?x)
^pandas/tests/generic/test_generic.py # GH50380
|^pandas/tests/io/json/test_readlines.py # GH50378
7 changes: 7 additions & 0 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ if [[ "$PATTERN" ]]; then
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
fi

if [[ "$ERROR_ON_WARNINGS" == "1" ]]; then
for pth in $(find pandas -name '*.py' -not -path "pandas/tests/*" | sed -e 's/\.py//g' -e 's/\/__init__//g' -e 's/\//./g');
do
PYTEST_CMD="$PYTEST_CMD -W error:::$pth"
done
fi

echo $PYTEST_CMD
sh -c "$PYTEST_CMD"

Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ To completely override the default values that are recognized as missing, specif
.. _io.navaluesconst:

The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A',
'n/a', 'NA', '<NA>', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
'n/a', 'NA', '<NA>', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', 'None', '']``.

Let us consider some examples:

Expand Down
26 changes: 23 additions & 3 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)

* :func:`read_csv`
* :func:`read_fwf`
* :func:`read_excel`
* :func:`read_html`
* :func:`read_sql`
* :func:`read_sql_query`
* :func:`read_sql_table`
Expand All @@ -46,6 +48,7 @@ to select the nullable dtypes implementation.

* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
* :func:`read_excel`
* :func:`read_html`
* :func:`read_parquet`
* :func:`read_orc`

Expand Down Expand Up @@ -98,6 +101,8 @@ Other enhancements
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
-

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -501,15 +506,16 @@ Other API changes
- Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`)
- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`)
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`, :issue:`50453`)
- :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
- :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`)
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`)
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects(copy=False)`` to do type inference on the result (:issue:`49999`, :issue:`49714`)
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
- Added ``"None"`` to default ``na_values`` in :func:`read_csv` (:issue:`50286`)
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`)
- Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)
- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
Expand All @@ -520,6 +526,7 @@ Other API changes
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
-

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -550,6 +557,7 @@ Removal of prior version deprecations/changes
- Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
- Removed deprecated :meth:`Index.asi8` (:issue:`37877`)
- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`)
- Enforced deprecation changing behavior when applying a numpy ufunc on multiple non-aligned (on the index or columns) :class:`DataFrame` that will now align the inputs first (:issue:`39239`)
- Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`)
- Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`)
- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`)
Expand Down Expand Up @@ -793,7 +801,7 @@ Performance improvements
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
- Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
- Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
- Performance improvement in ``var`` and ``std`` for nullable dtypes (:issue:`48379`).
- Performance improvement when iterating over pyarrow and nullable dtypes (:issue:`49825`, :issue:`49851`)
- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
- Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
Expand All @@ -803,6 +811,7 @@ Performance improvements
- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
- Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`)
- Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`)
- Performance improvement in :func:`to_datetime` when using ``'%Y%m%d'`` format (:issue:`17410`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.bug_fixes:
Expand Down Expand Up @@ -832,6 +841,13 @@ Datetimelike
- Bug in :meth:`Timestamp.round` when the ``freq`` argument has zero-duration (e.g. "0ns") returning incorrect results instead of raising (:issue:`49737`)
- Bug in :func:`to_datetime` was not raising ``ValueError`` when invalid format was passed and ``errors`` was ``'ignore'`` or ``'coerce'`` (:issue:`50266`)
- Bug in :class:`DateOffset` was throwing ``TypeError`` when constructing with milliseconds and another super-daily argument (:issue:`49897`)
- Bug in :func:`to_datetime` was not raising ``ValueError`` when parsing string with decimal date with format ``'%Y%m%d'`` (:issue:`50051`)
- Bug in :func:`to_datetime` was not converting ``None`` to ``NaT`` when parsing mixed-offset date strings with ISO8601 format (:issue:`50071`)
- Bug in :func:`to_datetime` was not returning input when parsing out-of-bounds date string with ``errors='ignore'`` and ``format='%Y%m%d'`` (:issue:`14487`)
- Bug in :func:`to_datetime` was converting timezone-naive ``datetime.datetime`` to timezone-aware when parsing with timezone-aware strings, ISO8601 format, and ``utc=False`` (:issue:`50254`)
- Bug in :func:`to_datetime` was throwing ``ValueError`` when parsing dates with ISO8601 format where some values were not zero-padded (:issue:`21422`)
- Bug in :func:`to_datetime` was giving incorrect results when using ``format='%Y%m%d'`` and ``errors='ignore'`` (:issue:`26493`)
- Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
-

Timedelta
Expand Down Expand Up @@ -956,6 +972,9 @@ Groupby/resample/rolling
- Bug in :meth:`.SeriesGroupBy.describe` with ``as_index=False`` would have the incorrect shape (:issue:`49256`)
- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` with ``dropna=False`` would drop NA values when the grouper was categorical (:issue:`36327`)
- Bug in :meth:`.SeriesGroupBy.nunique` would incorrectly raise when the grouper was an empty categorical and ``observed=True`` (:issue:`21334`)
- Bug in :meth:`.SeriesGroupBy.nth` would raise when grouper contained NA values after subsetting from a :class:`DataFrameGroupBy` (:issue:`26454`)
- Bug in :meth:`DataFrame.groupby` would not include a :class:`.Grouper` specified by ``key`` in the result when ``as_index=False`` (:issue:`50413`)
-

Reshaping
^^^^^^^^^
Expand All @@ -980,6 +999,7 @@ ExtensionArray
^^^^^^^^^^^^^^
- Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`)
- Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`49890`)
- Bug in :meth:`Series.round` for pyarrow-backed dtypes raising ``AttributeError`` (:issue:`50437`)
- Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`)
- Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`)

Expand Down
4 changes: 2 additions & 2 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
for _dependency in _hard_dependencies:
try:
__import__(_dependency)
except ImportError as _e:
except ImportError as _e: # pragma: no cover
_missing_dependencies.append(f"{_dependency}: {_e}")

if _missing_dependencies:
if _missing_dependencies: # pragma: no cover
raise ImportError(
"Unable to import required dependencies:\n" + "\n".join(_missing_dependencies)
)
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/intervaltree.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,8 @@ cdef class IntervalTree(IntervalMixin):
"""
if self._na_count > 0:
return False
values = [self.right, self.left]

sort_order = np.lexsort(values)
sort_order = self.left_sorter
return is_monotonic(sort_order, False)[0]

def get_indexer(self, scalar_t[:] target) -> np.ndarray:
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,7 @@ STR_NA_VALUES = {
"nan",
"-nan",
"",
"None",
}
_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES))

Expand Down
5 changes: 1 addition & 4 deletions pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def format_array_from_datetime(
values: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
format: str | None = ...,
na_rep: object = ...,
na_rep: str | float = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.object_]: ...
def array_with_unit_to_datetime(
Expand All @@ -23,9 +23,6 @@ def array_to_datetime(
dayfirst: bool = ...,
yearfirst: bool = ...,
utc: bool = ...,
require_iso8601: bool = ...,
format: str | None = ...,
exact: bool = ...,
) -> tuple[np.ndarray, tzinfo | None]: ...

# returned ndarray may be object dtype or datetime64[ns]
Expand Down
Loading

0 comments on commit 016ddbb

Please sign in to comment.