diff --git a/.binstar.yml b/.binstar.yml
deleted file mode 100644
index 7b507b4f90049..0000000000000
--- a/.binstar.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-package: pandas
-user: jreback
-
-install:
- - conda config --add channels pandas
-
-before_script:
- - python -V
-
-platform:
- - linux-64
- #- linux-32
- - osx-64
- #- win-32
- - win-64
-engine:
- - python=2.7
- - python=3.4
-script:
- - conda build conda.recipe --quiet
-
-iotimeout: 600
-
-build_targets: conda
-
-notifications:
- email:
- recipients: ['jeff@reback.net']
diff --git a/.devcontainer.json b/.devcontainer.json
new file mode 100644
index 0000000000000..315a1ff647012
--- /dev/null
+++ b/.devcontainer.json
@@ -0,0 +1,28 @@
+// For format details, see https://aka.ms/vscode-remote/devcontainer.json or the definition README at
+// https://github.com/microsoft/vscode-dev-containers/tree/master/containers/python-3-miniconda
+{
+ "name": "pandas",
+ "context": ".",
+ "dockerFile": "Dockerfile",
+
+ // Use 'settings' to set *default* container specific settings.json values on container create.
+ // You can edit these settings after create using File > Preferences > Settings > Remote.
+ "settings": {
+ "terminal.integrated.shell.linux": "/bin/bash",
+ "python.condaPath": "/opt/conda/bin/conda",
+ "python.pythonPath": "/opt/conda/bin/python",
+ "python.formatting.provider": "black",
+ "python.linting.enabled": true,
+ "python.linting.flake8Enabled": true,
+ "python.linting.pylintEnabled": false,
+ "python.linting.mypyEnabled": true,
+ "python.testing.pytestEnabled": true,
+ "python.testing.cwd": "pandas/tests"
+ },
+
+ // Add the IDs of extensions you want installed when the container is created in the array below.
+ "extensions": [
+ "ms-python.python",
+ "ms-vscode.cpptools"
+ ]
+}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a36420556ae24..d87fa5203bd52 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,53 +23,53 @@ jobs:
- name: Looking for unwanted patterns
run: ci/code_checks.sh patterns
- if: true
+ if: always()
- name: Setup environment and build pandas
run: ci/setup_env.sh
- if: true
+ if: always()
- name: Linting
run: |
source activate pandas-dev
ci/code_checks.sh lint
- if: true
+ if: always()
- name: Dependencies consistency
run: |
source activate pandas-dev
ci/code_checks.sh dependencies
- if: true
+ if: always()
- name: Checks on imported code
run: |
source activate pandas-dev
ci/code_checks.sh code
- if: true
+ if: always()
- name: Running doctests
run: |
source activate pandas-dev
ci/code_checks.sh doctests
- if: true
+ if: always()
- name: Docstring validation
run: |
source activate pandas-dev
ci/code_checks.sh docstrings
- if: true
+ if: always()
- name: Typing validation
run: |
source activate pandas-dev
ci/code_checks.sh typing
- if: true
+ if: always()
- name: Testing docstring validation script
run: |
source activate pandas-dev
pytest --capture=no --strict scripts
- if: true
+ if: always()
- name: Running benchmarks
run: |
@@ -87,7 +87,7 @@ jobs:
else
echo "Benchmarks did not run, no changes detected"
fi
- if: true
+ if: always()
- name: Publish benchmarks artifact
uses: actions/upload-artifact@master
@@ -95,3 +95,65 @@ jobs:
name: Benchmarks log
path: asv_bench/benchmarks.log
if: failure()
+
+ web_and_docs:
+ name: Web and docs
+ runs-on: ubuntu-latest
+ steps:
+
+ - name: Setting conda path
+ run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
+
+ - name: Checkout
+ uses: actions/checkout@v1
+
+ - name: Setup environment and build pandas
+ run: ci/setup_env.sh
+
+ - name: Build website
+ run: |
+ source activate pandas-dev
+ python web/pandas_web.py web/pandas --target-path=web/build
+
+ - name: Build documentation
+ run: |
+ source activate pandas-dev
+ doc/make.py --warnings-are-errors | tee sphinx.log ; exit ${PIPESTATUS[0]}
+
+ # This can be removed when the ipython directive fails when there are errors,
+ # including the `tee sphinx.log` in te previous step (https://github.com/ipython/ipython/issues/11547)
+ - name: Check ipython directive errors
+ run: "! grep -B1 \"^<<<-------------------------------------------------------------------------$\" sphinx.log"
+
+ - name: Merge website and docs
+ run: |
+ mkdir -p pandas_web/docs
+ cp -r web/build/* pandas_web/
+ cp -r doc/build/html/* pandas_web/docs/
+ if: github.event_name == 'push'
+
+ - name: Install Rclone
+ run: sudo apt install rclone -y
+ if: github.event_name == 'push'
+
+ - name: Set up Rclone
+ run: |
+ RCLONE_CONFIG_PATH=$HOME/.config/rclone/rclone.conf
+ mkdir -p `dirname $RCLONE_CONFIG_PATH`
+ echo "[ovh_cloud_pandas_web]" > $RCLONE_CONFIG_PATH
+ echo "type = swift" >> $RCLONE_CONFIG_PATH
+ echo "env_auth = false" >> $RCLONE_CONFIG_PATH
+ echo "auth_version = 3" >> $RCLONE_CONFIG_PATH
+ echo "auth = https://auth.cloud.ovh.net/v3/" >> $RCLONE_CONFIG_PATH
+ echo "endpoint_type = public" >> $RCLONE_CONFIG_PATH
+ echo "tenant_domain = default" >> $RCLONE_CONFIG_PATH
+ echo "tenant = 2977553886518025" >> $RCLONE_CONFIG_PATH
+ echo "domain = default" >> $RCLONE_CONFIG_PATH
+ echo "user = w4KGs3pmDxpd" >> $RCLONE_CONFIG_PATH
+ echo "key = ${{ secrets.ovh_object_store_key }}" >> $RCLONE_CONFIG_PATH
+ echo "region = BHS" >> $RCLONE_CONFIG_PATH
+ if: github.event_name == 'push'
+
+ - name: Sync web
+ run: rclone sync pandas_web ovh_cloud_pandas_web:dev
+ if: github.event_name == 'push'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b34f5dfdd1a83..139b9e31df46c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,8 +11,20 @@ repos:
language: python_venv
additional_dependencies: [flake8-comprehensions>=3.1.0]
- repo: https://github.com/pre-commit/mirrors-isort
- rev: v4.3.20
+ rev: v4.3.21
hooks:
- id: isort
language: python_venv
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
+- repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v0.730
+ hooks:
+ - id: mypy
+ args:
+ # As long as a some files are excluded from check-untyped-defs
+ # we have to exclude it from the pre-commit hook as the configuration
+ # is based on modules but the hook runs on files.
+ - --no-check-untyped-defs
+ - --follow-imports
+ - skip
+ files: pandas/
diff --git a/.travis.yml b/.travis.yml
index 0c7740295b637..2c8533d02ddc1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
language: python
-python: 3.5
+python: 3.7
# To turn off cached cython files and compiler cache
# set NOCACHE-true
@@ -7,10 +7,10 @@ python: 3.5
# travis cache --delete inside the project directory from the travis command line client
# The cache directories will be deleted if anything in ci/ changes in a commit
cache:
- ccache: true
- directories:
- - $HOME/.cache # cython cache
- - $HOME/.ccache # compiler cache
+ ccache: true
+ directories:
+ - $HOME/.cache # cython cache
+ - $HOME/.ccache # compiler cache
env:
global:
@@ -20,45 +20,40 @@ env:
- secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA="
git:
- # for cloning
- depth: false
+ # for cloning
+ depth: false
matrix:
- fast_finish: true
- exclude:
- # Exclude the default Python 3.5 build
- - python: 3.5
+ fast_finish: true
- include:
+ include:
- env:
- - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
+ - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
- env:
- - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
+ - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)"
- env:
- - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
+ - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
services:
- mysql
- postgresql
- env:
- - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
+ # Enabling Deprecations when running tests
+ # PANDAS_TESTING_MODE="deprecate" causes DeprecationWarning messages to be displayed in the logs
+ # See pandas/_testing.py for more details.
+ - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
services:
- mysql
- postgresql
- # In allow_failures
- env:
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
services:
- mysql
- postgresql
- allow_failures:
- - env:
- - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
-
before_install:
- echo "before_install"
# set non-blocking IO on travis
@@ -78,7 +73,6 @@ before_install:
# This overrides travis and tells it to look nowhere.
- export BOTO_CONFIG=/dev/null
-
install:
- echo "install start"
- ci/prep_cython_cache.sh
@@ -95,5 +89,5 @@ script:
after_script:
- echo "after_script start"
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
- - ci/print_skipped.py
+ - ci/print_skipped.py
- echo "after_script done"
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000..b8aff5d671dcf
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,47 @@
+FROM continuumio/miniconda3
+
+# if you forked pandas, you can pass in your own GitHub username to use your fork
+# i.e. gh_username=myname
+ARG gh_username=pandas-dev
+ARG pandas_home="/home/pandas"
+
+# Avoid warnings by switching to noninteractive
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Configure apt and install packages
+RUN apt-get update \
+ && apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
+ #
+ # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
+ && apt-get -y install git iproute2 procps iproute2 lsb-release \
+ #
+ # Install C compilers (gcc not enough, so just went with build-essential which admittedly might be overkill),
+ # needed to build pandas C extensions
+ && apt-get -y install build-essential \
+ #
+ # cleanup
+ && apt-get autoremove -y \
+ && apt-get clean -y \
+ && rm -rf /var/lib/apt/lists/*
+
+# Switch back to dialog for any ad-hoc use of apt-get
+ENV DEBIAN_FRONTEND=dialog
+
+# Clone pandas repo
+RUN mkdir "$pandas_home" \
+ && git clone "https://github.com/$gh_username/pandas.git" "$pandas_home" \
+ && cd "$pandas_home" \
+ && git remote add upstream "https://github.com/pandas-dev/pandas.git" \
+ && git pull upstream master
+
+# Because it is surprisingly difficult to activate a conda environment inside a DockerFile
+# (from personal experience and per https://github.com/ContinuumIO/docker-images/issues/89),
+# we just update the base/root one from the 'environment.yml' file instead of creating a new one.
+#
+# Set up environment
+RUN conda env update -n base -f "$pandas_home/environment.yml"
+
+# Build C extensions and pandas
+RUN cd "$pandas_home" \
+ && python setup.py build_ext --inplace -j 4 \
+ && python -m pip install -e .
diff --git a/LICENSE b/LICENSE
index 924de26253bf4..76954a5a339ab 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,8 +1,10 @@
BSD 3-Clause License
-Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.
+Copyright (c) 2011-2020, Open source contributors.
+
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
diff --git a/RELEASE.md b/RELEASE.md
index efd075dabcba9..7924ffaff561f 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -2,5 +2,5 @@ Release Notes
=============
The list of changes to Pandas between each release can be found
-[here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full
+[here](https://pandas.pydata.org/pandas-docs/stable/whatsnew/index.html). For full
details, see the commit logs at http://github.com/pandas-dev/pandas.
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index c04bbf53a86a6..7886b63e9983e 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -7,7 +7,7 @@
"project": "pandas",
// The project's homepage
- "project_url": "http://pandas.pydata.org/",
+ "project_url": "https://pandas.pydata.org/",
// The URL of the source code repository for the project being
// benchmarked
@@ -43,6 +43,7 @@
"matplotlib": [],
"sqlalchemy": [],
"scipy": [],
+ "numba": [],
"numexpr": [],
"pytables": [null, ""], // platform dependent, see excludes below
"tables": [null, ""],
@@ -122,5 +123,8 @@
".*": "0409521665"
},
"regression_thresholds": {
- }
+ },
+ "build_command":
+ ["python setup.py build -j4",
+ "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"],
}
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 7d97f2c740acb..0f3b3838de1b2 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -5,7 +5,8 @@
from pandas._libs import lib
import pandas as pd
-from pandas.util import testing as tm
+
+from .pandas_vb_common import tm
for imp in ["pandas.util", "pandas.tools.hashing"]:
try:
diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py
index 58e0db67d6025..64e067d25a454 100644
--- a/asv_bench/benchmarks/binary_ops.py
+++ b/asv_bench/benchmarks/binary_ops.py
@@ -1,3 +1,5 @@
+import operator
+
import numpy as np
from pandas import DataFrame, Series, date_range
@@ -9,6 +11,36 @@
import pandas.computation.expressions as expr
+class IntFrameWithScalar:
+ params = [
+ [np.float64, np.int64],
+ [2, 3.0, np.int32(4), np.float64(5)],
+ [
+ operator.add,
+ operator.sub,
+ operator.mul,
+ operator.truediv,
+ operator.floordiv,
+ operator.pow,
+ operator.mod,
+ operator.eq,
+ operator.ne,
+ operator.gt,
+ operator.ge,
+ operator.lt,
+ operator.le,
+ ],
+ ]
+ param_names = ["dtype", "scalar", "op"]
+
+ def setup(self, dtype, scalar, op):
+ arr = np.random.randn(20000, 100)
+ self.df = DataFrame(arr.astype(dtype))
+
+ def time_frame_op_with_scalar(self, dtype, scalar, op):
+ op(self.df, scalar)
+
+
class Ops:
params = [[True, False], ["default", 1]]
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 43b1b31a0bfe8..1dcd52ac074a6 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -3,7 +3,8 @@
import numpy as np
import pandas as pd
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
try:
from pandas.api.types import union_categoricals
diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py
index a9e45cad22d27..7c43485f5ef45 100644
--- a/asv_bench/benchmarks/ctors.py
+++ b/asv_bench/benchmarks/ctors.py
@@ -1,7 +1,8 @@
import numpy as np
from pandas import DatetimeIndex, Index, MultiIndex, Series, Timestamp
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
def no_change(arr):
diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
index 1deca8fe3aad0..2b24bab85bc57 100644
--- a/asv_bench/benchmarks/frame_ctor.py
+++ b/asv_bench/benchmarks/frame_ctor.py
@@ -1,7 +1,8 @@
import numpy as np
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
try:
from pandas.tseries.offsets import Nano, Hour
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index ae6c07107f4a0..2187668c96ca4 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -4,7 +4,8 @@
import numpy as np
from pandas import DataFrame, MultiIndex, NaT, Series, date_range, isnull, period_range
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
class GetNumericData:
diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
index 860c6cc6192bb..e266d871f5bc6 100644
--- a/asv_bench/benchmarks/gil.py
+++ b/asv_bench/benchmarks/gil.py
@@ -2,7 +2,8 @@
from pandas import DataFrame, Series, date_range, factorize, read_csv
from pandas.core.algorithms import take_1d
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
try:
from pandas import (
@@ -24,7 +25,7 @@
except ImportError:
from pandas import algos
try:
- from pandas.util.testing import test_parallel
+ from pandas._testing import test_parallel
have_real_test_parallel = True
except ImportError:
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index d51c53e2264f1..28e0dcc5d9b13 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -13,7 +13,8 @@
date_range,
period_range,
)
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
method_blacklist = {
"object": {
diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index d69799eb70040..103141545504b 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -12,7 +12,8 @@
Series,
date_range,
)
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
class SetOperations:
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index c78c2fa92827e..087fe3916845b 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -17,7 +17,8 @@
option_context,
period_range,
)
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
class NumericSeriesIndexing:
@@ -131,6 +132,7 @@ def setup(self):
self.col_scalar = columns[10]
self.bool_indexer = self.df[self.col_scalar] > 0
self.bool_obj_indexer = self.bool_indexer.astype(object)
+ self.boolean_indexer = (self.df[self.col_scalar] > 0).astype("boolean")
def time_loc(self):
self.df.loc[self.idx_scalar, self.col_scalar]
@@ -144,6 +146,9 @@ def time_boolean_rows(self):
def time_boolean_rows_object(self):
self.df[self.bool_obj_indexer]
+ def time_boolean_rows_boolean(self):
+ self.df[self.boolean_indexer]
+
class DataFrameNumericIndexing:
def setup(self):
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
index e85b3bd2c7687..1a8d5ede52512 100644
--- a/asv_bench/benchmarks/inference.py
+++ b/asv_bench/benchmarks/inference.py
@@ -1,9 +1,8 @@
import numpy as np
from pandas import DataFrame, Series, to_numeric
-import pandas.util.testing as tm
-from .pandas_vb_common import lib, numeric_dtypes
+from .pandas_vb_common import lib, numeric_dtypes, tm
class NumericInferOps:
diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index b8e8630e663ee..9bcd125f56bbb 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -5,9 +5,8 @@
import numpy as np
from pandas import Categorical, DataFrame, date_range, read_csv, to_datetime
-import pandas.util.testing as tm
-from ..pandas_vb_common import BaseIO
+from ..pandas_vb_common import BaseIO, tm
class ToCSV(BaseIO):
diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index 75d87140488e3..80af2cff41769 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -6,7 +6,8 @@
from odf.text import P
from pandas import DataFrame, ExcelWriter, date_range, read_excel
-import pandas.util.testing as tm
+
+from ..pandas_vb_common import tm
def _generate_dataframe():
diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py
index 88c1a3dc48ea4..4ca399a293a4b 100644
--- a/asv_bench/benchmarks/io/hdf.py
+++ b/asv_bench/benchmarks/io/hdf.py
@@ -1,9 +1,8 @@
import numpy as np
from pandas import DataFrame, HDFStore, date_range, read_hdf
-import pandas.util.testing as tm
-from ..pandas_vb_common import BaseIO
+from ..pandas_vb_common import BaseIO, tm
class HDFStoreDataFrame(BaseIO):
diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py
index 8f037e94e0095..f478bf2aee0ba 100644
--- a/asv_bench/benchmarks/io/json.py
+++ b/asv_bench/benchmarks/io/json.py
@@ -1,9 +1,8 @@
import numpy as np
from pandas import DataFrame, concat, date_range, read_json, timedelta_range
-import pandas.util.testing as tm
-from ..pandas_vb_common import BaseIO
+from ..pandas_vb_common import BaseIO, tm
class ReadJSON(BaseIO):
@@ -132,6 +131,30 @@ def peakmem_to_json_wide(self, orient, frame):
df.to_json(self.fname, orient=orient)
+class ToJSONISO(BaseIO):
+ fname = "__test__.json"
+ params = [["split", "columns", "index", "values", "records"]]
+ param_names = ["orient"]
+
+ def setup(self, orient):
+ N = 10 ** 5
+ index = date_range("20000101", periods=N, freq="H")
+ timedeltas = timedelta_range(start=1, periods=N, freq="s")
+ datetimes = date_range(start=1, periods=N, freq="s")
+ self.df = DataFrame(
+ {
+ "td_1": timedeltas,
+ "td_2": timedeltas,
+ "ts_1": datetimes,
+ "ts_2": datetimes,
+ },
+ index=index,
+ )
+
+ def time_iso_format(self, orient):
+ self.df.to_json(orient=orient, date_format="iso")
+
+
class ToJSONLines(BaseIO):
fname = "__test__.json"
diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py
index 12620656dd2bf..4ca9a82ae4827 100644
--- a/asv_bench/benchmarks/io/pickle.py
+++ b/asv_bench/benchmarks/io/pickle.py
@@ -1,9 +1,8 @@
import numpy as np
from pandas import DataFrame, date_range, read_pickle
-import pandas.util.testing as tm
-from ..pandas_vb_common import BaseIO
+from ..pandas_vb_common import BaseIO, tm
class Pickle(BaseIO):
diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py
index 6cc7f56ae3d65..b71bb832280b9 100644
--- a/asv_bench/benchmarks/io/sql.py
+++ b/asv_bench/benchmarks/io/sql.py
@@ -4,7 +4,8 @@
from sqlalchemy import create_engine
from pandas import DataFrame, date_range, read_sql_query, read_sql_table
-import pandas.util.testing as tm
+
+from ..pandas_vb_common import tm
class SQL:
diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py
index f3125f8598418..9faafa82ff46e 100644
--- a/asv_bench/benchmarks/io/stata.py
+++ b/asv_bench/benchmarks/io/stata.py
@@ -1,9 +1,8 @@
import numpy as np
from pandas import DataFrame, date_range, read_stata
-import pandas.util.testing as tm
-from ..pandas_vb_common import BaseIO
+from ..pandas_vb_common import BaseIO, tm
class Stata(BaseIO):
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index 5cf9f6336ba0c..1333b3a0f0560 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -3,7 +3,8 @@
import numpy as np
from pandas import DataFrame, MultiIndex, Series, concat, date_range, merge, merge_asof
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
try:
from pandas import merge_ordered
diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py
index 5a396c9f0deff..0e188c58012fa 100644
--- a/asv_bench/benchmarks/multiindex_object.py
+++ b/asv_bench/benchmarks/multiindex_object.py
@@ -3,7 +3,8 @@
import numpy as np
from pandas import DataFrame, MultiIndex, RangeIndex, date_range
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
class GetLoc:
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
index 1faf13329110d..6da2b2270c04a 100644
--- a/asv_bench/benchmarks/pandas_vb_common.py
+++ b/asv_bench/benchmarks/pandas_vb_common.py
@@ -13,6 +13,13 @@
except (ImportError, TypeError, ValueError):
pass
+# Compatibility import for the testing module
+try:
+ import pandas._testing as tm # noqa
+except ImportError:
+ import pandas.util.testing as tm # noqa
+
+
numeric_dtypes = [
np.int64,
np.int32,
diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py
index cd450f801c805..03394e6fe08cb 100644
--- a/asv_bench/benchmarks/reindex.py
+++ b/asv_bench/benchmarks/reindex.py
@@ -1,9 +1,8 @@
import numpy as np
from pandas import DataFrame, Index, MultiIndex, Series, date_range, period_range
-import pandas.util.testing as tm
-from .pandas_vb_common import lib
+from .pandas_vb_common import lib, tm
class Reindex:
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 7a72622fd5fe3..f7e1e395a76bc 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -44,6 +44,27 @@ def time_rolling(self, constructor, window, dtype, function, raw):
self.roll.apply(function, raw=raw)
+class Engine:
+ params = (
+ ["DataFrame", "Series"],
+ ["int", "float"],
+ [np.sum, lambda x: np.sum(x) + 5],
+ ["cython", "numba"],
+ )
+ param_names = ["constructor", "dtype", "function", "engine"]
+
+ def setup(self, constructor, dtype, function, engine):
+ N = 10 ** 3
+ arr = (100 * np.random.random(N)).astype(dtype)
+ self.data = getattr(pd, constructor)(arr)
+
+ def time_rolling_apply(self, constructor, dtype, function, engine):
+ self.data.rolling(10).apply(function, raw=True, engine=engine)
+
+ def time_expanding_apply(self, constructor, dtype, function, engine):
+ self.data.expanding().apply(function, raw=True, engine=engine)
+
+
class ExpandingMethods:
params = (
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index a3f1d92545c3f..57c625ced8a43 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -3,7 +3,8 @@
import numpy as np
from pandas import NaT, Series, date_range
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
class SeriesConstructor:
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index f30b2482615bd..d7fb2775376c0 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -3,7 +3,8 @@
import numpy as np
from pandas import DataFrame, Series
-import pandas.util.testing as tm
+
+from .pandas_vb_common import tm
class Methods:
diff --git a/asv_bench/benchmarks/tslibs/offsets.py b/asv_bench/benchmarks/tslibs/offsets.py
index d6379b922641c..fc1efe63307b2 100644
--- a/asv_bench/benchmarks/tslibs/offsets.py
+++ b/asv_bench/benchmarks/tslibs/offsets.py
@@ -59,7 +59,7 @@ def setup(self, offset):
def time_on_offset(self, offset):
for date in self.dates:
- offset.onOffset(date)
+ offset.is_on_offset(date)
class OffestDatetimeArithmetic:
diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml
index cb0b17e3553a4..c9a2e4eefd19d 100644
--- a/ci/azure/posix.yml
+++ b/ci/azure/posix.yml
@@ -18,31 +18,39 @@ jobs:
py36_minimum_versions:
ENV_FILE: ci/deps/azure-36-minimum_versions.yaml
CONDA_PY: "36"
- PATTERN: "not slow and not network"
+ PATTERN: "not slow and not network and not clipboard"
+
py36_locale_slow_old_np:
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
CONDA_PY: "36"
PATTERN: "slow"
- LOCALE_OVERRIDE: "zh_CN.UTF-8"
+ # pandas does not use the language (zh_CN), but should support diferent encodings (utf8)
+ # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
+ LANG: "zh_CN.utf8"
+ LC_ALL: "zh_CN.utf8"
EXTRA_APT: "language-pack-zh-hans"
py36_locale:
ENV_FILE: ci/deps/azure-36-locale.yaml
CONDA_PY: "36"
PATTERN: "not slow and not network"
- LOCALE_OVERRIDE: "it_IT.UTF-8"
+ LANG: "it_IT.utf8"
+ LC_ALL: "it_IT.utf8"
+ EXTRA_APT: "language-pack-it xsel"
py36_32bit:
ENV_FILE: ci/deps/azure-36-32bit.yaml
CONDA_PY: "36"
- PATTERN: "not slow and not network"
+ PATTERN: "not slow and not network and not clipboard"
BITS32: "yes"
py37_locale:
ENV_FILE: ci/deps/azure-37-locale.yaml
CONDA_PY: "37"
PATTERN: "not slow and not network"
- LOCALE_OVERRIDE: "zh_CN.UTF-8"
+ LANG: "zh_CN.utf8"
+ LC_ALL: "zh_CN.utf8"
+ EXTRA_APT: "language-pack-zh-hans xsel"
py37_np_dev:
ENV_FILE: ci/deps/azure-37-numpydev.yaml
@@ -54,10 +62,16 @@ jobs:
steps:
- script: |
- if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi
- echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
- echo "Creating Environment"
- ci/setup_env.sh
+ if [ "$(uname)" == "Linux" ]; then
+ sudo apt-get update
+ sudo apt-get install -y libc6-dev-i386 $EXTRA_APT
+ fi
+ displayName: 'Install extra packages'
+
+ - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
+ displayName: 'Set conda path'
+
+ - script: ci/setup_env.sh
displayName: 'Setup environment and build pandas'
- script: |
diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml
index 03529bd6569c6..187a5db99802f 100644
--- a/ci/azure/windows.yml
+++ b/ci/azure/windows.yml
@@ -34,7 +34,7 @@ jobs:
- bash: |
source activate pandas-dev
conda list
- python setup.py build_ext -q -i
+ python setup.py build_ext -q -i -j 4
python -m pip install --no-build-isolation -e .
displayName: 'Build'
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 94eaab0a5b4da..0cc42be42d61e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -100,6 +100,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
RET=$(($RET + $?)) ; echo $MSG "DONE"
+ MSG='Check for use of not concatenated strings' ; echo $MSG
+ if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+ $BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" .
+ else
+ $BASE_DIR/scripts/validate_string_concatenation.py .
+ fi
+ RET=$(($RET + $?)) ; echo $MSG "DONE"
+
echo "isort --version-number"
isort --version-number
@@ -122,13 +130,18 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
# Check for imports from collections.abc instead of `from collections import abc`
MSG='Check for non-standard imports' ; echo $MSG
invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas
+ RET=$(($RET + $?)) ; echo $MSG "DONE"
invgrep -R --include="*.py*" -E "from pandas.core import common" pandas
+ RET=$(($RET + $?)) ; echo $MSG "DONE"
invgrep -R --include="*.py*" -E "from collections.abc import" pandas
+ RET=$(($RET + $?)) ; echo $MSG "DONE"
invgrep -R --include="*.py*" -E "from numpy import nan" pandas
+ RET=$(($RET + $?)) ; echo $MSG "DONE"
# Checks for test suite
- # Check for imports from pandas.util.testing instead of `import pandas.util.testing as tm`
- invgrep -R --include="*.py*" -E "from pandas.util.testing import" pandas/tests
+ # Check for imports from pandas._testing instead of `import pandas._testing as tm`
+ invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
+ RET=$(($RET + $?)) ; echo $MSG "DONE"
invgrep -R --include="*.py*" -E "from pandas.util import testing as tm" pandas/tests
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -195,6 +208,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' .
RET=$(($RET + $?)) ; echo $MSG "DONE"
+ MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
+ invgrep -R --include=*.{py,pyx} '!r}' pandas
+ RET=$(($RET + $?)) ; echo $MSG "DONE"
+
MSG='Check for use of comment-based annotation syntax' ; echo $MSG
invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -281,8 +298,11 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
-k"-from_arrays -from_breaks -from_intervals -from_tuples -set_closed -to_tuples -interval_range"
RET=$(($RET + $?)) ; echo $MSG "DONE"
- MSG='Doctests arrays/string_.py' ; echo $MSG
- pytest -q --doctest-modules pandas/core/arrays/string_.py
+ MSG='Doctests arrays'; echo $MSG
+ pytest -q --doctest-modules \
+ pandas/core/arrays/string_.py \
+ pandas/core/arrays/integer.py \
+ pandas/core/arrays/boolean.py
RET=$(($RET + $?)) ; echo $MSG "DONE"
MSG='Doctests arrays/boolean.py' ; echo $MSG
@@ -294,8 +314,8 @@ fi
### DOCSTRINGS ###
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
- MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA01, SA02, SA03, SA05)' ; echo $MSG
- $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA01,SA02,SA03,SA05
+ MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA02, SA03, SA05)' ; echo $MSG
+ $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03,SA05
RET=$(($RET + $?)) ; echo $MSG "DONE"
fi
diff --git a/ci/deps/azure-36-locale.yaml b/ci/deps/azure-36-locale.yaml
index 4f4c4524cb4dd..810554632a507 100644
--- a/ci/deps/azure-36-locale.yaml
+++ b/ci/deps/azure-36-locale.yaml
@@ -9,6 +9,7 @@ dependencies:
- cython>=0.29.13
- pytest>=5.0.1
- pytest-xdist>=1.21
+ - pytest-asyncio
- hypothesis>=3.58.0
- pytest-azurepipelines
@@ -26,7 +27,7 @@ dependencies:
- openpyxl
# lowest supported version of pyarrow (putting it here instead of in
# azure-36-minimum_versions because it needs numpy >= 1.14)
- - pyarrow=0.12
+ - pyarrow=0.13
- pytables
- python-dateutil
- pytz
diff --git a/ci/deps/azure-36-locale_slow.yaml b/ci/deps/azure-36-locale_slow.yaml
index 2bb2b00319382..48ac50c001715 100644
--- a/ci/deps/azure-36-locale_slow.yaml
+++ b/ci/deps/azure-36-locale_slow.yaml
@@ -13,7 +13,7 @@ dependencies:
- pytest-azurepipelines
# pandas dependencies
- - beautifulsoup4==4.6.0
+ - beautifulsoup4=4.6.0
- bottleneck=1.2.*
- lxml
- matplotlib=2.2.2
diff --git a/ci/deps/azure-36-minimum_versions.yaml b/ci/deps/azure-36-minimum_versions.yaml
index 8bf4f70d18aec..de7e011d9c7ca 100644
--- a/ci/deps/azure-36-minimum_versions.yaml
+++ b/ci/deps/azure-36-minimum_versions.yaml
@@ -17,6 +17,7 @@ dependencies:
- beautifulsoup4=4.6.0
- bottleneck=1.2.1
- jinja2=2.8
+ - numba=0.46.0
- numexpr=2.6.2
- numpy=1.13.3
- openpyxl=2.5.7
diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml
index a10fa0904a451..111ba6b020bc7 100644
--- a/ci/deps/azure-37-locale.yaml
+++ b/ci/deps/azure-37-locale.yaml
@@ -8,6 +8,7 @@ dependencies:
- cython>=0.29.13
- pytest>=5.0.1
- pytest-xdist>=1.21
+ - pytest-asyncio
- hypothesis>=3.58.0
- pytest-azurepipelines
diff --git a/ci/deps/azure-macos-36.yaml b/ci/deps/azure-macos-36.yaml
index f393ed84ecf63..3bbbdb4cf32ad 100644
--- a/ci/deps/azure-macos-36.yaml
+++ b/ci/deps/azure-macos-36.yaml
@@ -22,7 +22,7 @@ dependencies:
- numexpr
- numpy=1.14
- openpyxl
- - pyarrow>=0.12.0
+ - pyarrow>=0.13.0
- pytables
- python-dateutil==2.6.1
- pytz
diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml
index 2bd11c9030325..663c55492e69e 100644
--- a/ci/deps/azure-windows-36.yaml
+++ b/ci/deps/azure-windows-36.yaml
@@ -17,11 +17,12 @@ dependencies:
- bottleneck
- fastparquet>=0.3.2
- matplotlib=3.0.2
+ - numba
- numexpr
- numpy=1.15.*
- openpyxl
- jinja2
- - pyarrow>=0.12.0
+ - pyarrow>=0.13.0
- pytables
- python-dateutil
- pytz
diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml
index 928896efd5fc4..62be1075b3337 100644
--- a/ci/deps/azure-windows-37.yaml
+++ b/ci/deps/azure-windows-37.yaml
@@ -24,6 +24,7 @@ dependencies:
- numexpr
- numpy=1.14.*
- openpyxl
+ - pyarrow=0.14
- pytables
- python-dateutil
- pytz
diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml
index c1403f8eb8409..a46001c58d165 100644
--- a/ci/deps/travis-36-cov.yaml
+++ b/ci/deps/travis-36-cov.yaml
@@ -30,10 +30,8 @@ dependencies:
- openpyxl<=3.0.1
# https://github.com/pandas-dev/pandas/pull/30009 openpyxl 3.0.2 broke
- pandas-gbq
- # https://github.com/pydata/pandas-gbq/issues/271
- - google-cloud-bigquery<=1.11
- psycopg2
- - pyarrow>=0.12.0
+ - pyarrow>=0.13.0
- pymysql
- pytables
- python-snappy
diff --git a/ci/deps/travis-37.yaml b/ci/deps/travis-37.yaml
index 6826a9d072ff3..73e2c20b31438 100644
--- a/ci/deps/travis-37.yaml
+++ b/ci/deps/travis-37.yaml
@@ -20,6 +20,7 @@ dependencies:
- pyarrow
- pytz
- s3fs
+ - tabulate
- pyreadstat
- pip
- pip:
diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml
index 828f02596a70e..a627b7edc175f 100644
--- a/ci/deps/travis-38.yaml
+++ b/ci/deps/travis-38.yaml
@@ -17,3 +17,4 @@ dependencies:
- nomkl
- pytz
- pip
+ - tabulate==0.8.3
diff --git a/ci/print_skipped.py b/ci/print_skipped.py
index 72822fa2d3c7f..60e2f047235e6 100755
--- a/ci/print_skipped.py
+++ b/ci/print_skipped.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import os
import xml.etree.ElementTree as et
diff --git a/ci/run_tests.sh b/ci/run_tests.sh
index 0b68164e5767e..0cb1f4aabf352 100755
--- a/ci/run_tests.sh
+++ b/ci/run_tests.sh
@@ -5,17 +5,6 @@
# https://github.com/pytest-dev/pytest/issues/1075
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
-if [ -n "$LOCALE_OVERRIDE" ]; then
- export LC_ALL="$LOCALE_OVERRIDE"
- export LANG="$LOCALE_OVERRIDE"
- PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
- if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
- echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
- # TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
- # exit 1
- fi
-fi
-
if [[ "not network" == *"$PATTERN"* ]]; then
export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
fi
@@ -25,14 +14,14 @@ if [ "$COVERAGE" ]; then
COVERAGE="-s --cov=pandas --cov-report=xml:$COVERAGE_FNAME"
fi
-PYTEST_CMD="pytest -m \"$PATTERN\" -n auto --dist=loadfile -s --strict --durations=10 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
-
-# Travis does not have have an X server
-if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
- DISPLAY=DISPLAY=:99.0
- PYTEST_CMD="xvfb-run -e /dev/stdout $PYTEST_CMD"
+# If no X server is found, we use xvfb to emulate it
+if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then
+ export DISPLAY=":0"
+ XVFB="xvfb-run "
fi
+PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n auto --dist=loadfile -s --strict --durations=10 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
+
echo $PYTEST_CMD
sh -c "$PYTEST_CMD"
diff --git a/ci/setup_env.sh b/ci/setup_env.sh
index 08ba83ae94451..e5bee09fe2f79 100755
--- a/ci/setup_env.sh
+++ b/ci/setup_env.sh
@@ -1,15 +1,15 @@
#!/bin/bash -e
# edit the locale file if needed
-if [ -n "$LOCALE_OVERRIDE" ]; then
+if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
echo "Adding locale to the first line of pandas/__init__.py"
rm -f pandas/__init__.pyc
- SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n"
+ SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n"
sed -i "$SEDC" pandas/__init__.py
+
echo "[head -4 pandas/__init__.py]"
head -4 pandas/__init__.py
echo
- sudo locale-gen "$LOCALE_OVERRIDE"
fi
MINICONDA_DIR="$HOME/miniconda3"
@@ -114,6 +114,11 @@ echo "remove postgres if has been installed with conda"
echo "we use the one from the CI"
conda remove postgresql -y --force || true
+echo
+echo "remove qt"
+echo "causes problems with the clipboard, we use xsel for that"
+conda remove qt -y --force || true
+
echo
echo "conda list pandas"
conda list pandas
@@ -121,7 +126,7 @@ conda list pandas
# Make sure any error below is reported as such
echo "[Build extensions]"
-python setup.py build_ext -q -i
+python setup.py build_ext -q -i -j2
# XXX: Some of our environments end up with old versions of pip (10.x)
# Adding a new enough version of pip to the requirements explodes the
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
index f92090fecccf3..47f63c11d0567 100644
--- a/conda.recipe/meta.yaml
+++ b/conda.recipe/meta.yaml
@@ -36,5 +36,5 @@ test:
about:
- home: http://pandas.pydata.org
+ home: https://pandas.pydata.org
license: BSD
diff --git a/doc/make.py b/doc/make.py
index cf73f44b5dd02..024a748cd28ca 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
"""
Python script for building documentation.
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 481c03ab8f388..7f24d02a496e1 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -10,6 +10,7 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
+from datetime import datetime
import importlib
import inspect
import logging
@@ -137,7 +138,7 @@
# General information about the project.
project = "pandas"
-copyright = "2008-2014, the pandas development team"
+copyright = f"2008-{datetime.now().year}, the pandas development team"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst
new file mode 100644
index 0000000000000..a295038b5a0bd
--- /dev/null
+++ b/doc/source/development/code_style.rst
@@ -0,0 +1,155 @@
+.. _code_style:
+
+{{ header }}
+
+=======================
+pandas code style guide
+=======================
+
+.. contents:: Table of contents:
+ :local:
+
+Patterns
+========
+
+foo.__class__
+-------------
+
+*pandas* uses 'type(foo)' instead 'foo.__class__' as it is making the code more
+readable.
+
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+ foo = "bar"
+ type(foo)
+
+**Bad:**
+
+.. code-block:: python
+
+ foo = "bar"
+ foo.__class__
+
+
+String formatting
+=================
+
+Concatenated strings
+--------------------
+
+f-strings
+~~~~~~~~~
+
+*pandas* uses f-strings formatting instead of '%' and '.format()' string formatters.
+
+The convention of using f-strings on a string that is concatenated over serveral lines,
+is to prefix only the lines containing the value needs to be interpeted.
+
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+ foo = "old_function"
+ bar = "new_function"
+
+ my_warning_message = (
+ f"Warning, {foo} is deprecated, "
+ "please use the new and way better "
+ f"{bar}"
+ )
+
+**Bad:**
+
+.. code-block:: python
+
+ foo = "old_function"
+ bar = "new_function"
+
+ my_warning_message = (
+ f"Warning, {foo} is deprecated, "
+ f"please use the new and way better "
+ f"{bar}"
+ )
+
+White spaces
+~~~~~~~~~~~~
+
+Putting the white space only at the end of the previous line, so
+there is no whitespace at the beggining of the concatenated string.
+
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+ example_string = (
+ "Some long concatenated string, "
+ "with good placement of the "
+ "whitespaces"
+ )
+
+**Bad:**
+
+.. code-block:: python
+
+ example_string = (
+ "Some long concatenated string,"
+ " with bad placement of the"
+ " whitespaces"
+ )
+
+Representation function (aka 'repr()')
+--------------------------------------
+
+*pandas* uses 'repr()' instead of '%r' and '!r'.
+
+The use of 'repr()' will only happend when the value is not an obvious string.
+
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+ value = str
+ f"Unknown recived value, got: {repr(value)}"
+
+**Good:**
+
+.. code-block:: python
+
+ value = str
+ f"Unknown recived type, got: '{type(value).__name__}'"
+
+
+Imports (aim for absolute)
+==========================
+
+In Python 3, absolute imports are recommended. In absolute import doing something
+like ``import string`` will import the string module rather than ``string.py``
+in the same directory. As much as possible, you should try to write out
+absolute imports that show the whole import chain from toplevel pandas.
+
+Explicit relative imports are also supported in Python 3. But it is not
+recommended to use it. Implicit relative imports should never be used
+and is removed in Python 3.
+
+For example:
+
+::
+
+ # preferred
+ import pandas.core.common as com
+
+ # not preferred
+ from .common import test_base
+
+ # wrong
+ from common import test_base
diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst
index d7b3e159f8ce7..b650b2a2cf1fe 100644
--- a/doc/source/development/contributing.rst
+++ b/doc/source/development/contributing.rst
@@ -146,6 +146,17 @@ requires a C compiler and Python environment. If you're making documentation
changes, you can skip to :ref:`contributing.documentation` but you won't be able
to build the documentation locally before pushing your changes.
+Using a Docker Container
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Instead of manually setting up a development environment, you can use Docker to
+automatically create the environment with just several commands. Pandas provides a `DockerFile`
+in the root directory to build a Docker image with a full pandas development environment.
+
+Even easier, you can use the DockerFile to launch a remote session with Visual Studio Code,
+a popular free IDE, using the `.devcontainer.json` file.
+See https://code.visualstudio.com/docs/remote/containers for details.
+
.. _contributing.dev_c:
Installing a C compiler
@@ -354,9 +365,9 @@ About the *pandas* documentation
--------------------------------
The documentation is written in **reStructuredText**, which is almost like writing
-in plain English, and built using `Sphinx `__. The
+in plain English, and built using `Sphinx `__. The
Sphinx Documentation has an excellent `introduction to reST
-`__. Review the Sphinx docs to perform more
+`__. Review the Sphinx docs to perform more
complex changes to the documentation as well.
Some other important things to know about the docs:
@@ -434,7 +445,7 @@ The utility script ``scripts/validate_docstrings.py`` can be used to get a csv
summary of the API documentation. And also validate common errors in the docstring
of a specific class, function or method. The summary also compares the list of
methods documented in ``doc/source/api.rst`` (which is used to generate
-the `API Reference `_ page)
+the `API Reference `_ page)
and the actual public methods.
This will identify methods documented in ``doc/source/api.rst`` that are not actually
class methods, and existing methods that are not documented in ``doc/source/api.rst``.
@@ -569,8 +580,7 @@ do not make sudden changes to the code that could have the potential to break
a lot of user code as a result, that is, we need it to be as *backwards compatible*
as possible to avoid mass breakages.
-Additional standards are outlined on the `code style wiki
-page `_.
+Additional standards are outlined on the `pandas code style guide `_
Optional dependencies
---------------------
@@ -636,6 +646,8 @@ many errors as possible, but it may not correct *all* of them. Thus, it is
recommended that you run ``cpplint`` to double check and make any other style
fixes manually.
+.. _contributing.code-formatting:
+
Python (PEP8 / black)
~~~~~~~~~~~~~~~~~~~~~
@@ -657,19 +669,8 @@ apply ``black`` as you edit files.
You should use a ``black`` version >= 19.10b0 as previous versions are not compatible
with the pandas codebase.
-Optionally, you may wish to setup `pre-commit hooks `_
-to automatically run ``black`` and ``flake8`` when you make a git commit. This
-can be done by installing ``pre-commit``::
-
- pip install pre-commit
-
-and then running::
-
- pre-commit install
-
-from the root of the pandas repository. Now ``black`` and ``flake8`` will be run
-each time you commit changes. You can skip these checks with
-``git commit --no-verify``.
+If you wish to run these checks automatically, we encourage you to use
+:ref:`pre-commits ` instead.
One caveat about ``git diff upstream/master -u -- "*.py" | flake8 --diff``: this
command will catch any stylistic errors in your changes specifically, but
@@ -677,7 +678,7 @@ be beware it may not catch all of them. For example, if you delete the only
usage of an imported function, it is stylistically incorrect to import an
unused function. However, style-checking the diff will not catch this because
the actual import is not part of the diff. Thus, for completeness, you should
-run this command, though it will take longer::
+run this command, though it may take longer::
git diff upstream/master --name-only -- "*.py" | xargs -r flake8
@@ -695,6 +696,8 @@ behaviour as follows::
This will get all the files being changed by the PR (and ending with ``.py``),
and run ``flake8`` on them, one after the other.
+Note that these commands can be run analogously with ``black``.
+
.. _contributing.import-formatting:
Import formatting
@@ -717,7 +720,6 @@ A summary of our current import sections ( in order ):
Imports are alphabetically sorted within these sections.
-
As part of :ref:`Continuous Integration ` checks we run::
isort --recursive --check-only pandas
@@ -741,8 +743,37 @@ to automatically format imports correctly. This will modify your local copy of t
The `--recursive` flag can be passed to sort all files in a directory.
+Alternatively, you can run a command similar to what was suggested for ``black`` and ``flake8`` :ref:`right above `::
+
+ git diff upstream/master --name-only -- "*.py" | xargs -r isort
+
+Where similar caveats apply if you are on OSX or Windows.
+
You can then verify the changes look ok, then git :ref:`commit ` and :ref:`push `.
+.. _contributing.pre-commit:
+
+Pre-Commit
+~~~~~~~~~~
+
+You can run many of these styling checks manually as we have described above. However,
+we encourage you to use `pre-commit hooks `_ instead
+to automatically run ``black``, ``flake8``, ``isort`` when you make a git commit. This
+can be done by installing ``pre-commit``::
+
+ pip install pre-commit
+
+and then running::
+
+ pre-commit install
+
+from the root of the pandas repository. Now all of the styling checks will be
+run each time you commit changes without your needing to run each one manually.
+In addition, using this pre-commit hook will also allow you to more easily
+remain up-to-date with our code checks as they change.
+
+Note that if needed, you can skip these checks with ``git commit --no-verify``.
+
Backwards compatibility
~~~~~~~~~~~~~~~~~~~~~~~
@@ -957,7 +988,7 @@ inspiration. If your test requires working with files or
network connectivity, there is more information on the `testing page
`_ of the wiki.
-The ``pandas.util.testing`` module has many special ``assert`` functions that
+The ``pandas._testing`` module has many special ``assert`` functions that
make it easier to make statements about whether Series or DataFrame objects are
equivalent. The easiest way to verify that your code is correct is to
explicitly construct the result you expect, then compare the actual result to
@@ -1143,7 +1174,7 @@ If your change involves checking that a warning is actually emitted, use
.. code-block:: python
- import pandas.util.testing as tm
+ import pandas._testing as tm
df = pd.DataFrame()
@@ -1364,6 +1395,7 @@ some common prefixes along with general guidelines for when to use them:
* TST: Additions/updates to tests
* BLD: Updates to the build process/scripts
* PERF: Performance improvement
+* TYP: Type annotations
* CLN: Code cleanup
The following defines how a commit message should be structured. Please reference the
@@ -1504,3 +1536,19 @@ The branch will still exist on GitHub, so to delete it there do::
git push origin --delete shiny-new-feature
.. _Gitter: https://gitter.im/pydata/pandas
+
+
+Tips for a successful Pull Request
+==================================
+
+If you have made it to the `Review your code`_ phase, one of the core contributors may
+take a look. Please note however that a handful of people are responsible for reviewing
+all of the contributions, which can often lead to bottlenecks.
+
+To improve the chances of your pull request being reviewed, you should:
+
+- **Reference an open issue** for non-trivial changes to clarify the PR's purpose
+- **Ensure you have appropriate tests**. These should be the first part of any PR
+- **Keep your pull requests as simple as possible**. Larger PRs take longer to review
+- **Ensure that CI is in a green state**. Reviewers may not even look otherwise
+- **Keep** `Updating your pull request`_, either by request or every few days
diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst
index 34bc5f44eb0c0..cb32f0e1ee475 100644
--- a/doc/source/development/contributing_docstring.rst
+++ b/doc/source/development/contributing_docstring.rst
@@ -22,39 +22,39 @@ Next example gives an idea on how a docstring looks like:
.. code-block:: python
def add(num1, num2):
- """
- Add up two integer numbers.
-
- This function simply wraps the `+` operator, and does not
- do anything interesting, except for illustrating what is
- the docstring of a very simple function.
-
- Parameters
- ----------
- num1 : int
- First number to add
- num2 : int
- Second number to add
-
- Returns
- -------
- int
- The sum of `num1` and `num2`
-
- See Also
- --------
- subtract : Subtract one integer from another
-
- Examples
- --------
- >>> add(2, 2)
- 4
- >>> add(25, 0)
- 25
- >>> add(10, -10)
- 0
- """
- return num1 + num2
+ """
+ Add up two integer numbers.
+
+ This function simply wraps the `+` operator, and does not
+ do anything interesting, except for illustrating what is
+ the docstring of a very simple function.
+
+ Parameters
+ ----------
+ num1 : int
+ First number to add
+ num2 : int
+ Second number to add
+
+ Returns
+ -------
+ int
+ The sum of `num1` and `num2`
+
+ See Also
+ --------
+ subtract : Subtract one integer from another
+
+ Examples
+ --------
+ >>> add(2, 2)
+ 4
+ >>> add(25, 0)
+ 25
+ >>> add(10, -10)
+ 0
+ """
+ return num1 + num2
Some standards exist about docstrings, so they are easier to read, and they can
be exported to other formats such as html or pdf.
@@ -399,7 +399,7 @@ DataFrame:
* DataFrame
* pandas.Index
* pandas.Categorical
-* pandas.SparseArray
+* pandas.arrays.SparseArray
If the exact type is not relevant, but must be compatible with a numpy
array, array-like can be specified. If Any type that can be iterated is
diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst
index 757b197c717e6..f8a6bb6deb52d 100644
--- a/doc/source/development/index.rst
+++ b/doc/source/development/index.rst
@@ -13,6 +13,7 @@ Development
:maxdepth: 2
contributing
+ code_style
maintaining
internals
extending
diff --git a/doc/source/development/roadmap.rst b/doc/source/development/roadmap.rst
index 00598830e2fe9..fafe63d80249c 100644
--- a/doc/source/development/roadmap.rst
+++ b/doc/source/development/roadmap.rst
@@ -129,20 +129,6 @@ Some specific goals include
* Improve the overall organization of the documentation and specific subsections
of the documentation to make navigation and finding content easier.
-Package docstring validation
-----------------------------
-
-To improve the quality and consistency of pandas docstrings, we've developed
-tooling to check docstrings in a variety of ways.
-https://github.com/pandas-dev/pandas/blob/master/scripts/validate_docstrings.py
-contains the checks.
-
-Like many other projects, pandas uses the
-`numpydoc `__ style for writing
-docstrings. With the collaboration of the numpydoc maintainers, we'd like to
-move the checks to a package other than pandas so that other projects can easily
-use them as well.
-
Performance monitoring
----------------------
diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index 48c722bc16a86..90f839897ce4b 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -41,6 +41,16 @@ Pyjanitor provides a clean API for cleaning data, using method chaining.
Engarde is a lightweight library used to explicitly state assumptions about your datasets
and check that they're *actually* true.
+`pandas-path `__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since Python 3.4, `pathlib `_ has been
+included in the Python standard library. Path objects provide a simple
+and delightful way to interact with the file system. The pandas-path package enables the
+Path API for pandas through a custom accessor ``.path``. Getting just the filenames from
+a series of full file paths is as simple as ``my_files.path.name``. Other convenient operations like
+joining paths, replacing file extensions, and checking if files exist are also available.
+
.. _ecosystem.stats:
Statistics and machine learning
@@ -112,16 +122,14 @@ also goes beyond matplotlib and pandas with the option to perform statistical
estimation while plotting, aggregating across observations and visualizing the
fit of statistical models to emphasize patterns in a dataset.
-`yhat/ggpy `__
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+`plotnine `__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Hadley Wickham's `ggplot2 `__ is a foundational exploratory visualization package for the R language.
Based on `"The Grammar of Graphics" `__ it
provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data.
-It's really quite incredible. Various implementations to other languages are available,
-but a faithful implementation for Python users has long been missing. Although still young
-(as of Jan-2014), the `yhat/ggpy `__ project has been
-progressing quickly in that direction.
+Various implementations to other languages are available.
+A good implementation for Python users is `has2k1/plotnine `__.
`IPython Vega `__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -244,8 +252,8 @@ Pandas DataFrames with timeseries indexes.
`pydatastream `__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PyDatastream is a Python interface to the
-`Thomson Dataworks Enterprise (DWE/Datastream) `__
-SOAP API to return indexed Pandas DataFrames with financial data.
+`Refinitiv Datastream (DWS) `__
+REST API to return indexed Pandas DataFrames with financial data.
This package requires valid credentials for this API (non free).
`pandaSDMX `__
@@ -327,6 +335,21 @@ PyTables, h5py, and pymongo to move data between non pandas formats. Its graph
based approach is also extensible by end users for custom formats that may be
too specific for the core of odo.
+`Pandarallel `__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code.
+If also displays progress bars.
+
+.. code:: python
+
+ from pandarallel import pandarallel
+
+ pandarallel.initialize(progress_bar=True)
+
+ # df.apply(func)
+ df.parallel_apply(func)
+
`Ray `__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -371,13 +394,16 @@ A directory of projects providing
:ref:`extension accessors `. This is for users to
discover new accessors and for library authors to coordinate on the namespace.
-============== ========== =========================
-Library Accessor Classes
-============== ========== =========================
-`cyberpandas`_ ``ip`` ``Series``
-`pdvega`_ ``vgplot`` ``Series``, ``DataFrame``
-============== ========== =========================
+=============== ========== ========================= ===============================================================
+Library Accessor Classes Description
+=============== ========== ========================= ===============================================================
+`cyberpandas`_ ``ip`` ``Series`` Provides common operations for working with IP addresses.
+`pdvega`_ ``vgplot`` ``Series``, ``DataFrame`` Provides plotting functions from the Altair_ library.
+`pandas_path`_ ``path`` ``Index``, ``Series`` Provides `pathlib.Path`_ functions for Series.
+=============== ========== ========================= ===============================================================
.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest
.. _pdvega: https://altair-viz.github.io/pdvega/
-
+.. _Altair: https://altair-viz.github.io/
+.. _pandas_path: https://github.com/drivendataorg/pandas-path/
+.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html
\ No newline at end of file
diff --git a/doc/source/getting_started/10min.rst b/doc/source/getting_started/10min.rst
index 66e500131b316..3055a22129b91 100644
--- a/doc/source/getting_started/10min.rst
+++ b/doc/source/getting_started/10min.rst
@@ -697,8 +697,9 @@ Plotting
See the :ref:`Plotting ` docs.
+We use the standard convention for referencing the matplotlib API:
+
.. ipython:: python
- :suppress:
import matplotlib.pyplot as plt
plt.close('all')
diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index d489d35dc1226..4fef5efbd1551 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -1937,21 +1937,36 @@ See :ref:`extending.extension-types` for how to write your own extension that
works with pandas. See :ref:`ecosystem.extensions` for a list of third-party
libraries that have implemented an extension.
-The following table lists all of pandas extension types. See the respective
+The following table lists all of pandas extension types. For methods requiring ``dtype``
+arguments, strings can be specified as indicated. See the respective
documentation sections for more on each type.
-=================== ========================= ================== ============================= =============================
-Kind of Data Data Type Scalar Array Documentation
-=================== ========================= ================== ============================= =============================
-tz-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :class:`arrays.DatetimeArray` :ref:`timeseries.timezone`
-Categorical :class:`CategoricalDtype` (none) :class:`Categorical` :ref:`categorical`
-period (time spans) :class:`PeriodDtype` :class:`Period` :class:`arrays.PeriodArray` :ref:`timeseries.periods`
-sparse :class:`SparseDtype` (none) :class:`arrays.SparseArray` :ref:`sparse`
-intervals :class:`IntervalDtype` :class:`Interval` :class:`arrays.IntervalArray` :ref:`advanced.intervalindex`
-nullable integer :class:`Int64Dtype`, ... (none) :class:`arrays.IntegerArray` :ref:`integer_na`
-Strings :class:`StringDtype` :class:`str` :class:`arrays.StringArray` :ref:`text`
-Boolean (with NA) :class:`BooleanDtype` :class:`bool` :class:`arrays.BooleanArray` :ref:`api.arrays.bool`
-=================== ========================= ================== ============================= =============================
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| Kind of Data | Data Type | Scalar | Array | String Aliases | Documentation |
++===================+===========================+====================+===============================+=========================================+===============================+
+| tz-aware datetime | :class:`DatetimeTZDtype` | :class:`Timestamp` | :class:`arrays.DatetimeArray` | ``'datetime64[ns, ]'`` | :ref:`timeseries.timezone` |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| Categorical | :class:`CategoricalDtype` | (none) | :class:`Categorical` | ``'category'`` | :ref:`categorical` |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| period | :class:`PeriodDtype` | :class:`Period` | :class:`arrays.PeriodArray` | ``'period[]'``, | :ref:`timeseries.periods` |
+| (time spans) | | | | ``'Period[]'`` | |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| sparse | :class:`SparseDtype` | (none) | :class:`arrays.SparseArray` | ``'Sparse'``, ``'Sparse[int]'``, | :ref:`sparse` |
+| | | | | ``'Sparse[float]'`` | |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| intervals | :class:`IntervalDtype` | :class:`Interval` | :class:`arrays.IntervalArray` | ``'interval'``, ``'Interval'``, | :ref:`advanced.intervalindex` |
+| | | | | ``'Interval[]'``, | |
+| | | | | ``'Interval[datetime64[ns, ]]'``, | |
+| | | | | ``'Interval[timedelta64[]]'`` | |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| nullable integer + :class:`Int64Dtype`, ... | (none) | :class:`arrays.IntegerArray` | ``'Int8'``, ``'Int16'``, ``'Int32'``, | :ref:`integer_na` |
+| | | | | ``'Int64'``, ``'UInt8'``, ``'UInt16'``, | |
+| | | | | ``'UInt32'``, ``'UInt64'`` | |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| Strings | :class:`StringDtype` | :class:`str` | :class:`arrays.StringArray` | ``'string'`` | :ref:`text` |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
+| Boolean (with NA) | :class:`BooleanDtype` | :class:`bool` | :class:`arrays.BooleanArray` | ``'boolean'`` | :ref:`api.arrays.bool` |
++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
Pandas has two ways to store strings.
diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst
index 69bb700c97b15..4e284fe7b5968 100644
--- a/doc/source/getting_started/comparison/comparison_with_sas.rst
+++ b/doc/source/getting_started/comparison/comparison_with_sas.rst
@@ -629,7 +629,7 @@ for more details and examples.
.. ipython:: python
- tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
+ tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()
diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst
index db687386329bb..fec6bae1e0330 100644
--- a/doc/source/getting_started/comparison/comparison_with_stata.rst
+++ b/doc/source/getting_started/comparison/comparison_with_stata.rst
@@ -617,7 +617,7 @@ for more details and examples.
.. ipython:: python
- tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
+ tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()
diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst
index a07fcbd8b67c4..81a2f0ae7d162 100644
--- a/doc/source/getting_started/dsintro.rst
+++ b/doc/source/getting_started/dsintro.rst
@@ -136,7 +136,7 @@ Like a NumPy array, a pandas Series has a :attr:`~Series.dtype`.
This is often a NumPy dtype. However, pandas and 3rd-party libraries
extend NumPy's type system in a few places, in which case the dtype would
-be a :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within
+be an :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within
pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`basics.dtypes`
for more.
@@ -676,11 +676,11 @@ similar to an ndarray:
# only show the first 5 rows
df[:5].T
+.. _dsintro.numpy_interop:
+
DataFrame interoperability with NumPy functions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. _dsintro.numpy_interop:
-
Elementwise NumPy ufuncs (log, exp, sqrt, ...) and various other NumPy functions
can be used with no issues on Series and DataFrame, assuming the data within
are numeric:
@@ -741,7 +741,7 @@ implementation takes precedence and a Series is returned.
np.maximum(ser, idx)
NumPy ufuncs are safe to apply to :class:`Series` backed by non-ndarray arrays,
-for example :class:`SparseArray` (see :ref:`sparse.calculation`). If possible,
+for example :class:`arrays.SparseArray` (see :ref:`sparse.calculation`). If possible,
the ufunc is applied without converting the underlying data to an ndarray.
Console display
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 62a39fb5176f9..b3fd443e662a9 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -234,7 +234,8 @@ Optional dependencies
~~~~~~~~~~~~~~~~~~~~~
Pandas has many optional dependencies that are only used for specific methods.
-For example, :func:`pandas.read_hdf` requires the ``pytables`` package. If the
+For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
+:meth:`DataFrame.to_markdown` requires the ``tabulate`` package. If the
optional dependency is not installed, pandas will raise an ``ImportError`` when
the method requiring that dependency is called.
@@ -255,6 +256,7 @@ gcsfs 0.2.2 Google Cloud Storage access
html5lib HTML parser for read_html (see :ref:`note `)
lxml 3.8.0 HTML parser for read_html (see :ref:`note `)
matplotlib 2.2.2 Visualization
+numba 0.46.0 Alternative execution engine for rolling operations
openpyxl 2.5.7 Reading / writing for xlsx files
pandas-gbq 0.8.0 Google Big Query access
psycopg2 PostgreSQL engine for sqlalchemy
@@ -264,6 +266,7 @@ pyreadstat SPSS files (.sav) reading
pytables 3.4.2 HDF5 reading / writing
qtpy Clipboard I/O
s3fs 0.3.0 Amazon S3 access
+tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_)
xarray 0.8.2 pandas-like API for N-dimensional data
xclip Clipboard I/O on linux
xlrd 1.1.0 Excel reading
@@ -301,3 +304,4 @@ top-level :func:`~pandas.read_html` function:
.. _html5lib: https://github.com/html5lib/html5lib-python
.. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup
.. _lxml: http://lxml.de
+.. _tabulate: https://github.com/astanin/python-tabulate
diff --git a/doc/source/getting_started/tutorials.rst b/doc/source/getting_started/tutorials.rst
index 212f3636d0a98..1ed0e8f635b58 100644
--- a/doc/source/getting_started/tutorials.rst
+++ b/doc/source/getting_started/tutorials.rst
@@ -15,7 +15,7 @@ pandas' own :ref:`10 Minutes to pandas<10min>`.
More complex recipes are in the :ref:`Cookbook`.
-A handy pandas `cheat sheet `_.
+A handy pandas `cheat sheet `_.
Community guides
================
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index 9cea68530fbe7..4ced92cbda81a 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -39,7 +39,7 @@ See the :ref:`overview` for more detail about what's in the library.
:hidden:
{% endif %}
{% if not single_doc %}
- What's New in 1.0.0
+ What's New in 1.1.0
getting_started/index
user_guide/index
{% endif -%}
@@ -51,7 +51,7 @@ See the :ref:`overview` for more detail about what's in the library.
whatsnew/index
{% endif %}
-* :doc:`whatsnew/v1.0.0`
+* :doc:`whatsnew/v1.1.0`
* :doc:`getting_started/index`
* :doc:`getting_started/install`
@@ -109,6 +109,7 @@ See the :ref:`overview` for more detail about what's in the library.
* :doc:`development/index`
* :doc:`development/contributing`
+ * :doc:`development/code_style`
* :doc:`development/internals`
* :doc:`development/extending`
* :doc:`development/developer`
diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst
index cf14d28772f4c..c71350ecd73b3 100644
--- a/doc/source/reference/arrays.rst
+++ b/doc/source/reference/arrays.rst
@@ -12,7 +12,8 @@ For most data types, pandas uses NumPy arrays as the concrete
objects contained with a :class:`Index`, :class:`Series`, or
:class:`DataFrame`.
-For some data types, pandas extends NumPy's type system.
+For some data types, pandas extends NumPy's type system. String aliases for these types
+can be found at :ref:`basics.dtypes`.
=================== ========================= ================== =============================
Kind of Data Pandas Data Type Scalar Array
@@ -443,13 +444,13 @@ Sparse data
-----------
Data where a single value is repeated many times (e.g. ``0`` or ``NaN``) may
-be stored efficiently as a :class:`SparseArray`.
+be stored efficiently as a :class:`arrays.SparseArray`.
.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst
- SparseArray
+ arrays.SparseArray
.. autosummary::
:toctree: api/
diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
index 4b1a99da7cd4c..c072237850d82 100644
--- a/doc/source/reference/extensions.rst
+++ b/doc/source/reference/extensions.rst
@@ -59,3 +59,16 @@ objects.
api.extensions.ExtensionArray.nbytes
api.extensions.ExtensionArray.ndim
api.extensions.ExtensionArray.shape
+
+Additionally, we have some utility methods for ensuring your object
+behaves correctly.
+
+.. autosummary::
+ :toctree: api/
+
+ api.indexers.check_bool_array_indexer
+
+
+The sentinel ``pandas.api.extensions.no_default`` is used as the default
+value in some methods. Use an ``is`` comparison to check if the user
+provides a non-default value.
diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index 815f3f9c19d49..01aa6c60e3b2f 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -273,6 +273,8 @@ Metadata
:attr:`DataFrame.attrs` is a dictionary for storing global metadata for this DataFrame.
+.. warning:: ``DataFrame.attrs`` is considered experimental and may change without warning.
+
.. autosummary::
:toctree: api/
@@ -361,4 +363,5 @@ Serialization / IO / conversion
DataFrame.to_records
DataFrame.to_string
DataFrame.to_clipboard
+ DataFrame.to_markdown
DataFrame.style
diff --git a/doc/source/reference/general_utility_functions.rst b/doc/source/reference/general_utility_functions.rst
index 0961acc43f301..0d9e0b0f4c668 100644
--- a/doc/source/reference/general_utility_functions.rst
+++ b/doc/source/reference/general_utility_functions.rst
@@ -18,6 +18,8 @@ Working with options
set_option
option_context
+.. _api.general.testing:
+
Testing functions
-----------------
.. autosummary::
@@ -26,6 +28,7 @@ Testing functions
testing.assert_frame_equal
testing.assert_series_equal
testing.assert_index_equal
+ testing.assert_extension_array_equal
Exceptions and warnings
-----------------------
diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst
index 4a58055f1c955..fc1c6d6bd6d47 100644
--- a/doc/source/reference/offset_frequency.rst
+++ b/doc/source/reference/offset_frequency.rst
@@ -35,6 +35,8 @@ Methods
DateOffset.copy
DateOffset.isAnchored
DateOffset.onOffset
+ DateOffset.is_anchored
+ DateOffset.is_on_offset
BusinessDay
-----------
@@ -65,6 +67,8 @@ Methods
BusinessDay.copy
BusinessDay.isAnchored
BusinessDay.onOffset
+ BusinessDay.is_anchored
+ BusinessDay.is_on_offset
BusinessHour
------------
@@ -94,6 +98,8 @@ Methods
BusinessHour.copy
BusinessHour.isAnchored
BusinessHour.onOffset
+ BusinessHour.is_anchored
+ BusinessHour.is_on_offset
CustomBusinessDay
-----------------
@@ -123,6 +129,8 @@ Methods
CustomBusinessDay.copy
CustomBusinessDay.isAnchored
CustomBusinessDay.onOffset
+ CustomBusinessDay.is_anchored
+ CustomBusinessDay.is_on_offset
CustomBusinessHour
------------------
@@ -152,6 +160,8 @@ Methods
CustomBusinessHour.copy
CustomBusinessHour.isAnchored
CustomBusinessHour.onOffset
+ CustomBusinessHour.is_anchored
+ CustomBusinessHour.is_on_offset
MonthOffset
-----------
@@ -182,6 +192,8 @@ Methods
MonthOffset.copy
MonthOffset.isAnchored
MonthOffset.onOffset
+ MonthOffset.is_anchored
+ MonthOffset.is_on_offset
MonthEnd
--------
@@ -212,6 +224,8 @@ Methods
MonthEnd.copy
MonthEnd.isAnchored
MonthEnd.onOffset
+ MonthEnd.is_anchored
+ MonthEnd.is_on_offset
MonthBegin
----------
@@ -242,6 +256,8 @@ Methods
MonthBegin.copy
MonthBegin.isAnchored
MonthBegin.onOffset
+ MonthBegin.is_anchored
+ MonthBegin.is_on_offset
BusinessMonthEnd
----------------
@@ -272,6 +288,8 @@ Methods
BusinessMonthEnd.copy
BusinessMonthEnd.isAnchored
BusinessMonthEnd.onOffset
+ BusinessMonthEnd.is_anchored
+ BusinessMonthEnd.is_on_offset
BusinessMonthBegin
------------------
@@ -302,6 +320,8 @@ Methods
BusinessMonthBegin.copy
BusinessMonthBegin.isAnchored
BusinessMonthBegin.onOffset
+ BusinessMonthBegin.is_anchored
+ BusinessMonthBegin.is_on_offset
CustomBusinessMonthEnd
----------------------
@@ -332,6 +352,8 @@ Methods
CustomBusinessMonthEnd.copy
CustomBusinessMonthEnd.isAnchored
CustomBusinessMonthEnd.onOffset
+ CustomBusinessMonthEnd.is_anchored
+ CustomBusinessMonthEnd.is_on_offset
CustomBusinessMonthBegin
------------------------
@@ -362,6 +384,8 @@ Methods
CustomBusinessMonthBegin.copy
CustomBusinessMonthBegin.isAnchored
CustomBusinessMonthBegin.onOffset
+ CustomBusinessMonthBegin.is_anchored
+ CustomBusinessMonthBegin.is_on_offset
SemiMonthOffset
---------------
@@ -392,6 +416,8 @@ Methods
SemiMonthOffset.copy
SemiMonthOffset.isAnchored
SemiMonthOffset.onOffset
+ SemiMonthOffset.is_anchored
+ SemiMonthOffset.is_on_offset
SemiMonthEnd
------------
@@ -422,6 +448,8 @@ Methods
SemiMonthEnd.copy
SemiMonthEnd.isAnchored
SemiMonthEnd.onOffset
+ SemiMonthEnd.is_anchored
+ SemiMonthEnd.is_on_offset
SemiMonthBegin
--------------
@@ -452,6 +480,8 @@ Methods
SemiMonthBegin.copy
SemiMonthBegin.isAnchored
SemiMonthBegin.onOffset
+ SemiMonthBegin.is_anchored
+ SemiMonthBegin.is_on_offset
Week
----
@@ -482,6 +512,8 @@ Methods
Week.copy
Week.isAnchored
Week.onOffset
+ Week.is_anchored
+ Week.is_on_offset
WeekOfMonth
-----------
@@ -511,6 +543,8 @@ Methods
WeekOfMonth.copy
WeekOfMonth.isAnchored
WeekOfMonth.onOffset
+ WeekOfMonth.is_anchored
+ WeekOfMonth.is_on_offset
LastWeekOfMonth
---------------
@@ -540,6 +574,8 @@ Methods
LastWeekOfMonth.copy
LastWeekOfMonth.isAnchored
LastWeekOfMonth.onOffset
+ LastWeekOfMonth.is_anchored
+ LastWeekOfMonth.is_on_offset
QuarterOffset
-------------
@@ -570,6 +606,8 @@ Methods
QuarterOffset.copy
QuarterOffset.isAnchored
QuarterOffset.onOffset
+ QuarterOffset.is_anchored
+ QuarterOffset.is_on_offset
BQuarterEnd
-----------
@@ -600,6 +638,8 @@ Methods
BQuarterEnd.copy
BQuarterEnd.isAnchored
BQuarterEnd.onOffset
+ BQuarterEnd.is_anchored
+ BQuarterEnd.is_on_offset
BQuarterBegin
-------------
@@ -630,6 +670,8 @@ Methods
BQuarterBegin.copy
BQuarterBegin.isAnchored
BQuarterBegin.onOffset
+ BQuarterBegin.is_anchored
+ BQuarterBegin.is_on_offset
QuarterEnd
----------
@@ -660,6 +702,8 @@ Methods
QuarterEnd.copy
QuarterEnd.isAnchored
QuarterEnd.onOffset
+ QuarterEnd.is_anchored
+ QuarterEnd.is_on_offset
QuarterBegin
------------
@@ -690,6 +734,8 @@ Methods
QuarterBegin.copy
QuarterBegin.isAnchored
QuarterBegin.onOffset
+ QuarterBegin.is_anchored
+ QuarterBegin.is_on_offset
YearOffset
----------
@@ -720,6 +766,8 @@ Methods
YearOffset.copy
YearOffset.isAnchored
YearOffset.onOffset
+ YearOffset.is_anchored
+ YearOffset.is_on_offset
BYearEnd
--------
@@ -750,6 +798,8 @@ Methods
BYearEnd.copy
BYearEnd.isAnchored
BYearEnd.onOffset
+ BYearEnd.is_anchored
+ BYearEnd.is_on_offset
BYearBegin
----------
@@ -780,6 +830,8 @@ Methods
BYearBegin.copy
BYearBegin.isAnchored
BYearBegin.onOffset
+ BYearBegin.is_anchored
+ BYearBegin.is_on_offset
YearEnd
-------
@@ -810,6 +862,8 @@ Methods
YearEnd.copy
YearEnd.isAnchored
YearEnd.onOffset
+ YearEnd.is_anchored
+ YearEnd.is_on_offset
YearBegin
---------
@@ -840,6 +894,8 @@ Methods
YearBegin.copy
YearBegin.isAnchored
YearBegin.onOffset
+ YearBegin.is_anchored
+ YearBegin.is_on_offset
FY5253
------
@@ -871,6 +927,8 @@ Methods
FY5253.get_year_end
FY5253.isAnchored
FY5253.onOffset
+ FY5253.is_anchored
+ FY5253.is_on_offset
FY5253Quarter
-------------
@@ -901,6 +959,8 @@ Methods
FY5253Quarter.get_weeks
FY5253Quarter.isAnchored
FY5253Quarter.onOffset
+ FY5253Quarter.is_anchored
+ FY5253Quarter.is_on_offset
FY5253Quarter.year_has_extra_week
Easter
@@ -931,6 +991,8 @@ Methods
Easter.copy
Easter.isAnchored
Easter.onOffset
+ Easter.is_anchored
+ Easter.is_on_offset
Tick
----
@@ -960,6 +1022,8 @@ Methods
Tick.copy
Tick.isAnchored
Tick.onOffset
+ Tick.is_anchored
+ Tick.is_on_offset
Day
---
@@ -989,6 +1053,8 @@ Methods
Day.copy
Day.isAnchored
Day.onOffset
+ Day.is_anchored
+ Day.is_on_offset
Hour
----
@@ -1018,6 +1084,8 @@ Methods
Hour.copy
Hour.isAnchored
Hour.onOffset
+ Hour.is_anchored
+ Hour.is_on_offset
Minute
------
@@ -1047,6 +1115,8 @@ Methods
Minute.copy
Minute.isAnchored
Minute.onOffset
+ Minute.is_anchored
+ Minute.is_on_offset
Second
------
@@ -1076,6 +1146,8 @@ Methods
Second.copy
Second.isAnchored
Second.onOffset
+ Second.is_anchored
+ Second.is_on_offset
Milli
-----
@@ -1105,6 +1177,8 @@ Methods
Milli.copy
Milli.isAnchored
Milli.onOffset
+ Milli.is_anchored
+ Milli.is_on_offset
Micro
-----
@@ -1134,6 +1208,8 @@ Methods
Micro.copy
Micro.isAnchored
Micro.onOffset
+ Micro.is_anchored
+ Micro.is_on_offset
Nano
----
@@ -1163,6 +1239,8 @@ Methods
Nano.copy
Nano.isAnchored
Nano.onOffset
+ Nano.is_anchored
+ Nano.is_on_offset
BDay
----
@@ -1195,6 +1273,8 @@ Methods
BDay.copy
BDay.isAnchored
BDay.onOffset
+ BDay.is_anchored
+ BDay.is_on_offset
BDay.rollback
BDay.rollforward
@@ -1228,6 +1308,8 @@ Methods
BMonthEnd.copy
BMonthEnd.isAnchored
BMonthEnd.onOffset
+ BMonthEnd.is_anchored
+ BMonthEnd.is_on_offset
BMonthEnd.rollback
BMonthEnd.rollforward
@@ -1261,6 +1343,8 @@ Methods
BMonthBegin.copy
BMonthBegin.isAnchored
BMonthBegin.onOffset
+ BMonthBegin.is_anchored
+ BMonthBegin.is_on_offset
BMonthBegin.rollback
BMonthBegin.rollforward
@@ -1298,6 +1382,8 @@ Methods
CBMonthEnd.copy
CBMonthEnd.isAnchored
CBMonthEnd.onOffset
+ CBMonthEnd.is_anchored
+ CBMonthEnd.is_on_offset
CBMonthEnd.rollback
CBMonthEnd.rollforward
@@ -1335,6 +1421,8 @@ Methods
CBMonthBegin.copy
CBMonthBegin.isAnchored
CBMonthBegin.onOffset
+ CBMonthBegin.is_anchored
+ CBMonthBegin.is_on_offset
CBMonthBegin.rollback
CBMonthBegin.rollforward
@@ -1369,6 +1457,8 @@ Methods
CDay.copy
CDay.isAnchored
CDay.onOffset
+ CDay.is_anchored
+ CDay.is_on_offset
CDay.rollback
CDay.rollforward
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 6e1ee303135d8..4ad6a7b014532 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -525,6 +525,8 @@ Metadata
:attr:`Series.attrs` is a dictionary for storing global metadata for this Series.
+.. warning:: ``Series.attrs`` is considered experimental and may change without warning.
+
.. autosummary::
:toctree: api/
@@ -578,3 +580,4 @@ Serialization / IO / conversion
Series.to_string
Series.to_clipboard
Series.to_latex
+ Series.to_markdown
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 31bb71064d735..d6f5c0c758b60 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -565,19 +565,15 @@ When working with an ``Index`` object directly, rather than via a ``DataFrame``,
mi2 = mi.rename("new name", level=0)
mi2
-.. warning::
- Prior to pandas 1.0.0, you could also set the names of a ``MultiIndex``
- by updating the name of a level.
+You cannot set the names of the MultiIndex via a level.
- .. code-block:: none
+.. ipython:: python
+ :okexcept:
- >>> mi.levels[0].name = 'name via level'
- >>> mi.names[0] # only works for older panads
- 'name via level'
+ mi.levels[0].name = "name via level"
- As of pandas 1.0, this will *silently* fail to update the names
- of the MultiIndex. Use :meth:`Index.set_names` instead.
+Use :meth:`Index.set_names` instead.
Sorting a ``MultiIndex``
------------------------
diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst
index e0f676d3072fc..5276bc6142206 100644
--- a/doc/source/user_guide/boolean.rst
+++ b/doc/source/user_guide/boolean.rst
@@ -14,6 +14,29 @@ Nullable Boolean Data Type
.. versionadded:: 1.0.0
+
+.. _boolean.indexing:
+
+Indexing with NA values
+-----------------------
+
+pandas does not allow indexing with NA values. Attempting to do so
+will raise a ``ValueError``.
+
+.. ipython:: python
+ :okexcept:
+
+ s = pd.Series([1, 2, 3])
+ mask = pd.array([True, False, pd.NA], dtype="boolean")
+ s[mask]
+
+The missing values will need to be explicitly filled with True or False prior
+to using the array as a mask.
+
+.. ipython:: python
+
+ s[mask.fillna(False)]
+
.. _boolean.kleene:
Kleene Logical Operations
diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
index 627a83b7359bb..aeb32db639ffb 100644
--- a/doc/source/user_guide/computation.rst
+++ b/doc/source/user_guide/computation.rst
@@ -321,6 +321,11 @@ We provide a number of common statistical functions:
:meth:`~Rolling.cov`, Unbiased covariance (binary)
:meth:`~Rolling.corr`, Correlation (binary)
+.. _stats.rolling_apply:
+
+Rolling Apply
+~~~~~~~~~~~~~
+
The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs
generic rolling computations. The ``func`` argument should be a single function
that produces a single value from an ndarray input. Suppose we wanted to
@@ -334,6 +339,49 @@ compute the mean absolute deviation on a rolling basis:
@savefig rolling_apply_ex.png
s.rolling(window=60).apply(mad, raw=True).plot(style='k')
+.. versionadded:: 1.0
+
+Additionally, :meth:`~Rolling.apply` can leverage `Numba `__
+if installed as an optional dependency. The apply aggregation can be executed using Numba by specifying
+``engine='numba'`` and ``engine_kwargs`` arguments (``raw`` must also be set to ``True``).
+Numba will be applied in potentially two routines:
+
+1. If ``func`` is a standard Python function, the engine will `JIT `__
+the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again.
+
+2. The engine will JIT the for loop where the apply function is applied to each window.
+
+The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the
+`numba.jit decorator `__.
+These keyword arguments will be applied to *both* the passed function (if a standard Python function)
+and the apply for loop over each window. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported,
+and their default values are set to ``False``, ``True`` and ``False`` respectively.
+
+.. note::
+
+ In terms of performance, **the first time a function is run using the Numba engine will be slow**
+ as Numba will have some function compilation overhead. However, ``rolling`` objects will cache
+ the function and subsequent calls will be fast. In general, the Numba engine is performant with
+ a larger amount of data points (e.g. 1+ million).
+
+.. code-block:: ipython
+
+ In [1]: data = pd.Series(range(1_000_000))
+
+ In [2]: roll = data.rolling(10)
+
+ In [3]: def f(x):
+ ...: return np.sum(x) + 5
+ # Run the first time, compilation time will affect performance
+ In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True) # noqa: E225
+ 1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
+ # Function is cached and performance will improve
+ In [5]: %timeit roll.apply(f, engine='numba', raw=True)
+ 188 ms ± 1.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+ In [6]: %timeit roll.apply(f, engine='cython', raw=True)
+ 3.92 s ± 59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
.. _stats.rolling_window:
Rolling windows
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
index 37637bbdb38e6..f581d183b9413 100644
--- a/doc/source/user_guide/cookbook.rst
+++ b/doc/source/user_guide/cookbook.rst
@@ -406,10 +406,10 @@ Levels
******
`Prepending a level to a multiindex
-`__
+`__
`Flatten Hierarchical columns
-`__
+`__
.. _cookbook.missing_data:
@@ -430,13 +430,13 @@ Fill forward a reversed timeseries
df.reindex(df.index[::-1]).ffill()
`cumsum reset at NaN values
-`__
+`__
Replace
*******
`Using replace with backrefs
-`__
+`__
.. _cookbook.grouping:
@@ -446,7 +446,7 @@ Grouping
The :ref:`grouping ` docs.
`Basic grouping with apply
-`__
+`__
Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to all the columns
@@ -462,7 +462,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
df.groupby('animal').apply(lambda subf: subf['size'][subf['weight'].idxmax()])
`Using get_group
-`__
+`__
.. ipython:: python
@@ -470,7 +470,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
gb.get_group('cat')
`Apply to different items in a group
-`__
+`__
.. ipython:: python
@@ -486,7 +486,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
expected_df
`Expanding apply
-`__
+`__
.. ipython:: python
@@ -502,7 +502,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
`Replacing some values with mean of the rest of a group
-`__
+`__
.. ipython:: python
@@ -516,7 +516,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
gb.transform(replace)
`Sort groups by aggregated data
-`__
+`__
.. ipython:: python
@@ -533,7 +533,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
sorted_df
`Create multiple aggregated columns
-`__
+`__
.. ipython:: python
@@ -550,7 +550,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
ts
`Create a value counts column and reassign back to the DataFrame
-`__
+`__
.. ipython:: python
@@ -561,7 +561,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
df
`Shift groups of the values in a column based on the index
-`__
+`__
.. ipython:: python
@@ -575,7 +575,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
df
`Select row with maximum value from each group
-`__
+`__
.. ipython:: python
@@ -587,7 +587,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
df_count
`Grouping like Python's itertools.groupby
-`__
+`__
.. ipython:: python
@@ -599,19 +599,19 @@ Expanding data
**************
`Alignment and to-date
-`__
+`__
`Rolling Computation window based on values instead of counts
-`__
+`__
`Rolling Mean by Time Interval
-`__
+`__
Splitting
*********
`Splitting a frame
-`__
+`__
Create a list of dataframes, split using a delineation based on logic included in rows.
@@ -635,7 +635,7 @@ Pivot
The :ref:`Pivot ` docs.
`Partial sums and subtotals
-`__
+`__
.. ipython:: python
@@ -649,7 +649,7 @@ The :ref:`Pivot ` docs.
table.stack('City')
`Frequency table like plyr in R
-`__
+`__
.. ipython:: python
@@ -675,7 +675,7 @@ The :ref:`Pivot ` docs.
'Grade': lambda x: sum(x) / len(x)})
`Plot pandas DataFrame with year over year data
-`__
+`__
To create year and month cross tabulation:
@@ -691,7 +691,7 @@ Apply
*****
`Rolling apply to organize - Turning embedded lists into a MultiIndex frame
-`__
+`__
.. ipython:: python
@@ -707,7 +707,7 @@ Apply
df_orgz
`Rolling apply with a DataFrame returning a Series
-`__
+`__
Rolling Apply to multiple columns where function calculates a Series before a Scalar from the Series is returned
@@ -727,7 +727,7 @@ Rolling Apply to multiple columns where function calculates a Series before a Sc
s
`Rolling apply with a DataFrame returning a Scalar
-`__
+`__
Rolling Apply to multiple columns where function returns a Scalar (Volume Weighted Average Price)
@@ -753,26 +753,26 @@ Timeseries
----------
`Between times
-`__
+`__
`Using indexer between time
-`__
+`__
`Constructing a datetime range that excludes weekends and includes only certain times
-`__
+`__
`Vectorized Lookup
-`__
+`__
`Aggregation and plotting time series
`__
Turn a matrix with hours in columns and days in rows into a continuous row sequence in the form of a time series.
`How to rearrange a Python pandas DataFrame?
-`__
+`__
`Dealing with duplicates when reindexing a timeseries to a specified frequency
-`__
+`__
Calculate the first day of the month for each entry in a DatetimeIndex
@@ -795,7 +795,7 @@ The :ref:`Resample ` docs.
`__
`Valid frequency arguments to Grouper
-`__
+`__
`Grouping using a MultiIndex
`__
@@ -804,15 +804,15 @@ The :ref:`Resample ` docs.
`__
`Resampling with custom periods
-`__
+`__
`Resample intraday frame without adding new days
-`__
+`__
`Resample minute data
-`__
+`__
-`Resample with groupby `__
+`Resample with groupby `__
.. _cookbook.merge:
@@ -822,7 +822,7 @@ Merge
The :ref:`Concat ` docs. The :ref:`Join ` docs.
`Append two dataframes with overlapping index (emulate R rbind)
-`__
+`__
.. ipython:: python
@@ -855,16 +855,16 @@ Depending on df construction, ``ignore_index`` may be needed
suffixes=('_L', '_R'))
`How to set the index and join
-`__
+`__
`KDB like asof join
-`__
+`__
`Join with a criteria based on the values
-`__
+`__
`Using searchsorted to merge based on values inside a range
-`__
+`__
.. _cookbook.plotting:
@@ -874,31 +874,31 @@ Plotting
The :ref:`Plotting ` docs.
`Make Matplotlib look like R
-`__
+`__
`Setting x-axis major and minor labels
-`__
+`__
`Plotting multiple charts in an ipython notebook
-`__
+`__
`Creating a multi-line plot
-`__
+`__
`Plotting a heatmap
-`__
+`__
`Annotate a time-series plot
-`__
+`__
`Annotate a time-series plot #2
-`__
+`__
`Generate Embedded plots in excel files using Pandas, Vincent and xlsxwriter
`__
`Boxplot for each quartile of a stratifying variable
-`__
+`__
.. ipython:: python
@@ -918,7 +918,7 @@ Data In/Out
-----------
`Performance comparison of SQL vs HDF5
-`__
+`__
.. _cookbook.csv:
@@ -930,25 +930,25 @@ The :ref:`CSV ` docs
`read_csv in action `__
`appending to a csv
-`__
+`__
`Reading a csv chunk-by-chunk
-`__
+`__
`Reading only certain rows of a csv chunk-by-chunk
-`__
+`__
`Reading the first few lines of a frame
-`__
+`__
Reading a file that is compressed but not by ``gzip/bz2`` (the native compressed formats which ``read_csv`` understands).
This example shows a ``WinZipped`` file, but is a general application of opening the file within a context manager and
using that handle to read.
`See here
-`__
+`__
`Inferring dtypes from a file
-`__
+`__
`Dealing with bad lines
`__
@@ -960,7 +960,7 @@ using that handle to read.
`__
`Write a multi-row index CSV without writing duplicates
-`__
+`__
.. _cookbook.csv.multiple_files:
@@ -1069,7 +1069,7 @@ SQL
The :ref:`SQL ` docs
`Reading from databases with SQL
-`__
+`__
.. _cookbook.excel:
@@ -1079,7 +1079,7 @@ Excel
The :ref:`Excel ` docs
`Reading from a filelike handle
-`__
+`__
`Modifying formatting in XlsxWriter output
`__
@@ -1090,7 +1090,7 @@ HTML
****
`Reading HTML tables from a server that cannot handle the default request
-header `__
+header `__
.. _cookbook.hdf:
@@ -1100,54 +1100,54 @@ HDFStore
The :ref:`HDFStores ` docs
`Simple queries with a Timestamp Index
-`__
+`__
`Managing heterogeneous data using a linked multiple table hierarchy
`__
`Merging on-disk tables with millions of rows
-`__
+`__
`Avoiding inconsistencies when writing to a store from multiple processes/threads
-`__
+`__
De-duplicating a large store by chunks, essentially a recursive reduction operation. Shows a function for taking in data from
csv file and creating a store by chunks, with date parsing as well.
`See here
-`__
+`__
`Creating a store chunk-by-chunk from a csv file
-`__
+`__
`Appending to a store, while creating a unique index
-`__
+`__
`Large Data work flows
-`__
+`__
`Reading in a sequence of files, then providing a global unique index to a store while appending
-`__
+`__
`Groupby on a HDFStore with low group density
-`__
+`__
`Groupby on a HDFStore with high group density
-`__
+`__
`Hierarchical queries on a HDFStore
-`__
+`__
`Counting with a HDFStore
-`__
+`__
`Troubleshoot HDFStore exceptions
-`__
+`__
`Setting min_itemsize with strings
-`__
+`__
`Using ptrepack to create a completely-sorted-index on a store
-`__
+`__
Storing Attributes to a group node
@@ -1305,7 +1305,7 @@ The :ref:`Timedeltas ` docs.
datetime.timedelta(minutes=5) + s
`Adding and subtracting deltas and dates
-`__
+`__
.. ipython:: python
@@ -1322,7 +1322,7 @@ The :ref:`Timedeltas ` docs.
df.dtypes
`Another example
-`__
+`__
Values can be set to NaT using np.nan, similar to datetime
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
index 0229331127441..a8cdf4a61073d 100644
--- a/doc/source/user_guide/indexing.rst
+++ b/doc/source/user_guide/indexing.rst
@@ -668,7 +668,7 @@ Current behavior
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
- http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
+ https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Out[4]:
1 2.0
diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst
index 77568f3bcb244..a45d7a4fa1547 100644
--- a/doc/source/user_guide/integer_na.rst
+++ b/doc/source/user_guide/integer_na.rst
@@ -15,6 +15,10 @@ Nullable integer data type
IntegerArray is currently experimental. Its API or implementation may
change without warning.
+.. versionchanged:: 1.0.0
+
+ Now uses :attr:`pandas.NA` as the missing value rather
+ than :attr:`numpy.nan`.
In :ref:`missing_data`, we saw that pandas primarily uses ``NaN`` to represent
missing data. Because ``NaN`` is a float, this forces an array of integers with
@@ -23,6 +27,9 @@ much. But if your integer column is, say, an identifier, casting to float can
be problematic. Some integers cannot even be represented as floating point
numbers.
+Construction
+------------
+
Pandas can represent integer data with possibly missing values using
:class:`arrays.IntegerArray`. This is an :ref:`extension types `
implemented within pandas.
@@ -39,6 +46,12 @@ NumPy's ``'int64'`` dtype:
pd.array([1, 2, np.nan], dtype="Int64")
+All NA-like values are replaced with :attr:`pandas.NA`.
+
+.. ipython:: python
+
+ pd.array([1, 2, np.nan, None, pd.NA], dtype="Int64")
+
This array can be stored in a :class:`DataFrame` or :class:`Series` like any
NumPy array.
@@ -78,6 +91,9 @@ with the dtype.
In the future, we may provide an option for :class:`Series` to infer a
nullable-integer dtype.
+Operations
+----------
+
Operations involving an integer array will behave similar to NumPy arrays.
Missing values will be propagated, and the data will be coerced to another
dtype if needed.
@@ -123,3 +139,15 @@ Reduction and groupby operations such as 'sum' work as well.
df.sum()
df.groupby('B').A.sum()
+
+Scalar NA Value
+---------------
+
+:class:`arrays.IntegerArray` uses :attr:`pandas.NA` as its scalar
+missing value. Slicing a single element that's missing will return
+:attr:`pandas.NA`
+
+.. ipython:: python
+
+ a = pd.array([1, None], dtype="Int64")
+ a[1]
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index c32b009948fda..e776da016d5d7 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1153,7 +1153,7 @@ To completely override the default values that are recognized as missing, specif
.. _io.navaluesconst:
The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A',
-'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
+'n/a', 'NA', '', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
Let us consider some examples:
@@ -1519,7 +1519,7 @@ rows will skip the intervening rows.
.. ipython:: python
- from pandas.util.testing import makeCustomDataframe as mkdf
+ from pandas._testing import makeCustomDataframe as mkdf
df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
df.to_csv('mi.csv')
print(open('mi.csv').read())
@@ -2066,6 +2066,8 @@ The Numpy parameter
+++++++++++++++++++
.. note::
+ This param has been deprecated as of version 1.0.0 and will raise a ``FutureWarning``.
+
This supports numeric data only. Index and columns labels may be non-numeric, e.g. strings, dates etc.
If ``numpy=True`` is passed to ``read_json`` an attempt will be made to sniff
@@ -2088,6 +2090,7 @@ data:
%timeit pd.read_json(jsonfloats)
.. ipython:: python
+ :okwarning:
%timeit pd.read_json(jsonfloats, numpy=True)
@@ -2102,6 +2105,7 @@ The speedup is less noticeable for smaller datasets:
%timeit pd.read_json(jsonfloats)
.. ipython:: python
+ :okwarning:
%timeit pd.read_json(jsonfloats, numpy=True)
@@ -2629,7 +2633,7 @@ that contain URLs.
url_df = pd.DataFrame({
'name': ['Python', 'Pandas'],
- 'url': ['https://www.python.org/', 'http://pandas.pydata.org']})
+ 'url': ['https://www.python.org/', 'https://pandas.pydata.org']})
print(url_df.to_html(render_links=True))
.. ipython:: python
@@ -3877,6 +3881,8 @@ specified in the format: ``()``, where float may be signed (and fra
store.append('dftd', dftd, data_columns=True)
store.select('dftd', "C<'-3.5D'")
+.. _io.query_multi:
+
Query MultiIndex
++++++++++++++++
@@ -4214,46 +4220,49 @@ Compression
all kinds of stores, not just tables. Two parameters are used to
control compression: ``complevel`` and ``complib``.
-``complevel`` specifies if and how hard data is to be compressed.
- ``complevel=0`` and ``complevel=None`` disables
- compression and ``0`_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow.
- - `lzo `_: Fast compression and decompression.
- - `bzip2 `_: Good compression rates.
- - `blosc `_: Fast compression and decompression.
-
- Support for alternative blosc compressors:
-
- - `blosc:blosclz `_ This is the
- default compressor for ``blosc``
- - `blosc:lz4
- `_:
- A compact, very popular and fast compressor.
- - `blosc:lz4hc
- `_:
- A tweaked version of LZ4, produces better
- compression ratios at the expense of speed.
- - `blosc:snappy `_:
- A popular compressor used in many places.
- - `blosc:zlib `_: A classic;
- somewhat slower than the previous ones, but
- achieving better compression ratios.
- - `blosc:zstd `_: An
- extremely well balanced codec; it provides the best
- compression ratios among the others above, and at
- reasonably fast speed.
-
- If ``complib`` is defined as something other than the
- listed libraries a ``ValueError`` exception is issued.
+* ``complevel`` specifies if and how hard data is to be compressed.
+ ``complevel=0`` and ``complevel=None`` disables compression and
+ ``0`_: The default compression library.
+ A classic in terms of compression, achieves good compression
+ rates but is somewhat slow.
+ - `lzo `_: Fast
+ compression and decompression.
+ - `bzip2 `_: Good compression rates.
+ - `blosc `_: Fast compression and
+ decompression.
+
+ Support for alternative blosc compressors:
+
+ - `blosc:blosclz `_ This is the
+ default compressor for ``blosc``
+ - `blosc:lz4
+ `_:
+ A compact, very popular and fast compressor.
+ - `blosc:lz4hc
+ `_:
+ A tweaked version of LZ4, produces better
+ compression ratios at the expense of speed.
+ - `blosc:snappy `_:
+ A popular compressor used in many places.
+ - `blosc:zlib `_: A classic;
+ somewhat slower than the previous ones, but
+ achieving better compression ratios.
+ - `blosc:zstd `_: An
+ extremely well balanced codec; it provides the best
+ compression ratios among the others above, and at
+ reasonably fast speed.
+
+ If ``complib`` is defined as something other than the listed libraries a
+ ``ValueError`` exception is issued.
.. note::
@@ -4646,10 +4655,10 @@ Several caveats.
* Index level names, if specified, must be strings.
* In the ``pyarrow`` engine, categorical dtypes for non-string types can be serialized to parquet, but will de-serialize as their primitive dtype.
* The ``pyarrow`` engine preserves the ``ordered`` flag of categorical dtypes with string types. ``fastparquet`` does not preserve the ``ordered`` flag.
-* Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
- on an attempt at serialization.
+* Non supported types include ``Interval`` and actual Python object types. These will raise a helpful error message
+ on an attempt at serialization. ``Period`` type is supported with pyarrow >= 0.16.0.
* The ``pyarrow`` engine preserves extension data types such as the nullable integer and string data
- type (requiring pyarrow >= 1.0.0, and requiring the extension type to implement the needed protocols,
+ type (requiring pyarrow >= 0.16.0, and requiring the extension type to implement the needed protocols,
see the :ref:`extension types documentation `).
You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``.
diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
index 1bfe196cb2f89..0f55980b3d015 100644
--- a/doc/source/user_guide/missing_data.rst
+++ b/doc/source/user_guide/missing_data.rst
@@ -791,7 +791,7 @@ the nullable :doc:`integer `, boolean and
:ref:`dedicated string ` data types as the missing value indicator.
The goal of ``pd.NA`` is provide a "missing" indicator that can be used
-consistently accross data types (instead of ``np.nan``, ``None`` or ``pd.NaT``
+consistently across data types (instead of ``np.nan``, ``None`` or ``pd.NaT``
depending on the data type).
For example, when having missing values in a Series with the nullable integer
@@ -825,14 +825,10 @@ For example, ``pd.NA`` propagates in arithmetic operations, similarly to
There are a few special cases when the result is known, even when one of the
operands is ``NA``.
+.. ipython:: python
-================ ======
-Operation Result
-================ ======
-``pd.NA ** 0`` 0
-``1 ** pd.NA`` 1
-``-1 ** pd.NA`` -1
-================ ======
+ pd.NA ** 0
+ 1 ** pd.NA
In equality and comparison operations, ``pd.NA`` also propagates. This deviates
from the behaviour of ``np.nan``, where comparisons with ``np.nan`` always
@@ -920,3 +916,29 @@ filling missing values beforehand.
A similar situation occurs when using Series or DataFrame objects in ``if``
statements, see :ref:`gotchas.truth`.
+
+NumPy ufuncs
+------------
+
+:attr:`pandas.NA` implements NumPy's ``__array_ufunc__`` protocol. Most ufuncs
+work with ``NA``, and generally return ``NA``:
+
+.. ipython:: python
+
+ np.log(pd.NA)
+ np.add(pd.NA, 1)
+
+.. warning::
+
+ Currently, ufuncs involving an ndarray and ``NA`` will return an
+ object-dtype filled with NA values.
+
+ .. ipython:: python
+
+ a = np.array([1, 2, 3])
+ np.greater(a, pd.NA)
+
+ The return type here may change to return a different array type
+ in the future.
+
+See :ref:`dsintro.numpy_interop` for more on ufuncs.
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index 8583a9312b690..b28354cd8b5f2 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -14,7 +14,7 @@ Reshaping by pivoting DataFrame objects
.. ipython:: python
:suppress:
- import pandas.util.testing as tm
+ import pandas._testing as tm
tm.N = 3
def unpivot(frame):
@@ -38,7 +38,7 @@ For the curious here is how the above ``DataFrame`` was created:
.. code-block:: python
- import pandas.util.testing as tm
+ import pandas._testing as tm
tm.N = 3
diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
index 0611c6334937f..43bb4966ec5bf 100644
--- a/doc/source/user_guide/scale.rst
+++ b/doc/source/user_guide/scale.rst
@@ -26,7 +26,7 @@ Assuming you want or need the expressiveness and power of pandas, let's carry on
.. ipython:: python
:suppress:
- from pandas.util.testing import _make_timeseries
+ from pandas._testing import _make_timeseries
# Make a random in-memory dataset
ts = _make_timeseries(freq="30S", seed=0)
diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
index c258a8840b714..8588fac4a18d0 100644
--- a/doc/source/user_guide/sparse.rst
+++ b/doc/source/user_guide/sparse.rst
@@ -15,7 +15,7 @@ can be chosen, including 0) is omitted. The compressed values are not actually s
arr = np.random.randn(10)
arr[2:-2] = np.nan
- ts = pd.Series(pd.SparseArray(arr))
+ ts = pd.Series(pd.arrays.SparseArray(arr))
ts
Notice the dtype, ``Sparse[float64, nan]``. The ``nan`` means that elements in the
@@ -51,7 +51,7 @@ identical to their dense counterparts.
SparseArray
-----------
-:class:`SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray`
+:class:`arrays.SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray`
for storing an array of sparse values (see :ref:`basics.dtypes` for more
on extension arrays). It is a 1-dimensional ndarray-like object storing
only values distinct from the ``fill_value``:
@@ -61,7 +61,7 @@ only values distinct from the ``fill_value``:
arr = np.random.randn(10)
arr[2:5] = np.nan
arr[7:8] = np.nan
- sparr = pd.SparseArray(arr)
+ sparr = pd.arrays.SparseArray(arr)
sparr
A sparse array can be converted to a regular (dense) ndarray with :meth:`numpy.asarray`
@@ -144,7 +144,7 @@ to ``SparseArray`` and get a ``SparseArray`` as a result.
.. ipython:: python
- arr = pd.SparseArray([1., np.nan, np.nan, -2., np.nan])
+ arr = pd.arrays.SparseArray([1., np.nan, np.nan, -2., np.nan])
np.abs(arr)
@@ -153,7 +153,7 @@ the correct dense result.
.. ipython:: python
- arr = pd.SparseArray([1., -1, -1, -2., -1], fill_value=-1)
+ arr = pd.arrays.SparseArray([1., -1, -1, -2., -1], fill_value=-1)
np.abs(arr)
np.abs(arr).to_dense()
@@ -194,7 +194,7 @@ From an array-like, use the regular :class:`Series` or
.. ipython:: python
# New way
- pd.DataFrame({"A": pd.SparseArray([0, 1])})
+ pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])})
From a SciPy sparse matrix, use :meth:`DataFrame.sparse.from_spmatrix`,
@@ -256,10 +256,10 @@ Instead, you'll need to ensure that the values being assigned are sparse
.. ipython:: python
- df = pd.DataFrame({"A": pd.SparseArray([0, 1])})
+ df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])})
df['B'] = [0, 0] # remains dense
df['B'].dtype
- df['B'] = pd.SparseArray([0, 0])
+ df['B'] = pd.arrays.SparseArray([0, 0])
df['B'].dtype
The ``SparseDataFrame.default_kind`` and ``SparseDataFrame.default_fill_value`` attributes
diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
index 633827eb79f46..02550eab86913 100644
--- a/doc/source/user_guide/style.ipynb
+++ b/doc/source/user_guide/style.ipynb
@@ -1063,7 +1063,7 @@
"- Provide an API that is pleasing to use interactively and is \"good enough\" for many tasks\n",
"- Provide the foundations for dedicated libraries to build on\n",
"\n",
- "If you build a great library on top of this, let us know and we'll [link](http://pandas.pydata.org/pandas-docs/stable/ecosystem.html) to it.\n",
+ "If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/pandas-docs/stable/ecosystem.html) to it.\n",
"\n",
"### Subclassing\n",
"\n",
diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index 53c7a7437d55f..88c86ac212f11 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -87,8 +87,9 @@ l. For ``StringDtype``, :ref:`string accessor methods`
.. ipython:: python
- s.astype(object).str.count("a")
- s.astype(object).dropna().str.count("a")
+ s2 = pd.Series(["a", None, "b"], dtype="object")
+ s2.str.count("a")
+ s2.dropna().str.count("a")
When NA values are present, the output dtype is float64. Similarly for
methods returning boolean values.
@@ -101,10 +102,10 @@ l. For ``StringDtype``, :ref:`string accessor methods`
2. Some string methods, like :meth:`Series.str.decode` are not available
on ``StringArray`` because ``StringArray`` only holds strings, not
bytes.
-3. In comparision operations, :class:`arrays.StringArray` and ``Series`` backed
+3. In comparison operations, :class:`arrays.StringArray` and ``Series`` backed
by a ``StringArray`` will return an object with :class:`BooleanDtype`,
rather than a ``bool`` dtype object. Missing values in a ``StringArray``
- will propagate in comparision operations, rather than always comparing
+ will propagate in comparison operations, rather than always comparing
unequal like :attr:`numpy.nan`.
Everything else that follows in the rest of this document applies equally to
diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index 05c7f72882088..bc463d0ab22d8 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -10,6 +10,14 @@ This is the list of changes to pandas between each release. For full details,
see the commit logs at http://github.com/pandas-dev/pandas. For install and
upgrade instructions, see :ref:`install`.
+Version 1.1
+-----------
+
+.. toctree::
+ :maxdepth: 2
+
+ v1.1.0
+
Version 1.0
-----------
diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst
index 86ff338536f80..823e177f3e05e 100644
--- a/doc/source/whatsnew/v0.12.0.rst
+++ b/doc/source/whatsnew/v0.12.0.rst
@@ -236,7 +236,7 @@ I/O enhancements
.. ipython:: python
- from pandas.util.testing import makeCustomDataframe as mkdf
+ from pandas._testing import makeCustomDataframe as mkdf
df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
df.to_csv('mi.csv')
print(open('mi.csv').read())
diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst
index 6242c40d44bf8..4f9ab761334e7 100644
--- a/doc/source/whatsnew/v0.13.1.rst
+++ b/doc/source/whatsnew/v0.13.1.rst
@@ -224,7 +224,7 @@ Enhancements
.. code-block:: ipython
- In [28]: import pandas.util.testing as tm
+ In [28]: import pandas._testing as tm
In [29]: panel = tm.makePanel(5)
diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst
index b328e549e8899..95e354e425143 100644
--- a/doc/source/whatsnew/v0.15.0.rst
+++ b/doc/source/whatsnew/v0.15.0.rst
@@ -852,7 +852,7 @@ Other notable API changes:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
- See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
+ See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
- ``merge``, ``DataFrame.merge``, and ``ordered_merge`` now return the same type
as the ``left`` argument (:issue:`7737`).
diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst
index b58eabaed6127..292351c709940 100644
--- a/doc/source/whatsnew/v0.15.2.rst
+++ b/doc/source/whatsnew/v0.15.2.rst
@@ -172,7 +172,7 @@ Other enhancements:
4 True True True True
- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`).
-- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__.
+- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__.
- ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`).
- ``Timedelta`` now supports arithmetic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`).
- Added ``Timedelta.to_timedelta64()`` method to the public API (:issue:`8884`).
diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst
index fc638e35ed88b..855d0b8695bb1 100644
--- a/doc/source/whatsnew/v0.16.0.rst
+++ b/doc/source/whatsnew/v0.16.0.rst
@@ -528,7 +528,7 @@ Deprecations
`seaborn `_ for similar
but more refined functionality (:issue:`3445`).
The documentation includes some examples how to convert your existing code
- from ``rplot`` to seaborn `here `__.
+ from ``rplot`` to seaborn `here `__.
- The ``pandas.sandbox.qtpandas`` interface is deprecated and will be removed in a future version.
We refer users to the external package `pandas-qt `_. (:issue:`9615`)
diff --git a/doc/source/whatsnew/v0.18.0.rst b/doc/source/whatsnew/v0.18.0.rst
index a7174c6325f86..d3f96d4185d65 100644
--- a/doc/source/whatsnew/v0.18.0.rst
+++ b/doc/source/whatsnew/v0.18.0.rst
@@ -1279,7 +1279,7 @@ Bug Fixes
- Removed ``millisecond`` property of ``DatetimeIndex``. This would always raise a ``ValueError`` (:issue:`12019`).
- Bug in ``Series`` constructor with read-only data (:issue:`11502`)
-- Removed ``pandas.util.testing.choice()``. Should use ``np.random.choice()``, instead. (:issue:`12386`)
+- Removed ``pandas._testing.choice()``. Should use ``np.random.choice()``, instead. (:issue:`12386`)
- Bug in ``.loc`` setitem indexer preventing the use of a TZ-aware DatetimeIndex (:issue:`12050`)
- Bug in ``.style`` indexes and MultiIndexes not appearing (:issue:`11655`)
- Bug in ``to_msgpack`` and ``from_msgpack`` which did not correctly serialize or deserialize ``NaT`` (:issue:`12307`).
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index 6f6446c3f74e1..6eb509a258430 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -1225,6 +1225,7 @@ Previously, sparse data were ``float64`` dtype by default, even if all inputs we
As of v0.19.0, sparse data keeps the input dtype, and uses more appropriate ``fill_value`` defaults (``0`` for ``int64`` dtype, ``False`` for ``bool`` dtype).
.. ipython:: python
+ :okwarning:
pd.SparseArray([1, 2, 0, 0], dtype=np.int64)
pd.SparseArray([True, False, False, False])
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index e7dc6150ffcb1..ceb1c7f27231b 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -1360,7 +1360,7 @@ provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`
.. code-block:: ipython
- In [133]: import pandas.util.testing as tm
+ In [133]: import pandas._testing as tm
In [134]: p = tm.makePanel()
diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst
index f33943e423b25..71969c4de6b02 100644
--- a/doc/source/whatsnew/v0.21.0.rst
+++ b/doc/source/whatsnew/v0.21.0.rst
@@ -470,7 +470,7 @@ Current behavior
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
- http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
+ https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Out[4]:
1 2.0
@@ -927,7 +927,7 @@ Other API changes
- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
- Compression defaults in HDF stores now follow pytables standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`)
-- Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`)
+- Removed the ``@slow`` decorator from ``pandas._testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`)
- Moved definition of ``MergeError`` to the ``pandas.errors`` module.
- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`)
- :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`)
diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst
index f4c283ea742f7..b9e1b5060d1da 100644
--- a/doc/source/whatsnew/v0.23.0.rst
+++ b/doc/source/whatsnew/v0.23.0.rst
@@ -648,7 +648,7 @@ provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`
.. code-block:: ipython
- In [75]: import pandas.util.testing as tm
+ In [75]: import pandas._testing as tm
In [76]: p = tm.makePanel()
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index b6b91983b8267..b18d022349001 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -354,6 +354,7 @@ When passed DataFrames whose values are sparse, :func:`concat` will now return a
:class:`Series` or :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`25702`).
.. ipython:: python
+ :okwarning:
df = pd.DataFrame({"A": pd.SparseArray([0, 1])})
@@ -910,6 +911,7 @@ by a ``Series`` or ``DataFrame`` with sparse values.
**New way**
.. ipython:: python
+ :okwarning:
df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])})
df.dtypes
diff --git a/doc/source/whatsnew/v0.25.3.rst b/doc/source/whatsnew/v0.25.3.rst
index f73a3f956f42e..f7f54198a0f82 100644
--- a/doc/source/whatsnew/v0.25.3.rst
+++ b/doc/source/whatsnew/v0.25.3.rst
@@ -19,4 +19,4 @@ Groupby/resample/rolling
Contributors
~~~~~~~~~~~~
-.. contributors:: v0.25.2..HEAD
+.. contributors:: v0.25.2..v0.25.3
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
old mode 100644
new mode 100755
index faca744a8f92c..3bd86bb02155f
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -1,40 +1,29 @@
-.. _whatsnew_1000:
+.. _whatsnew_100:
What's new in 1.0.0 (??)
------------------------
-.. warning::
-
- Starting with the 1.x series of releases, pandas only supports Python 3.6.1 and higher.
+These are the changes in pandas 1.0.0. See :ref:`release` for a full changelog
+including other versions of pandas.
-New Deprecation Policy
-~~~~~~~~~~~~~~~~~~~~~~
+.. note::
-Starting with Pandas 1.0.0, pandas will adopt a version of `SemVer`_.
+ The pandas 1.0 release removed a lot of functionality that was deprecated
+ in previous releases (see :ref:`below `
+ for an overview). It is recommended to first upgrade to pandas 0.25 and to
+ ensure your code is working without warnings, before upgrading to pandas
+ 1.0.
-Historically, pandas has used a "rolling" deprecation policy, with occasional
-outright breaking API changes. Where possible, we would deprecate the behavior
-we'd like to change, giving an option to adopt the new behavior (via a keyword
-or an alternative method), and issuing a warning for users of the old behavior.
-Sometimes, a deprecation was not possible, and we would make an outright API
-breaking change.
-We'll continue to *introduce* deprecations in major and minor releases (e.g.
-1.0.0, 1.1.0, ...). Those deprecations will be *enforced* in the next major
-release.
+New Deprecation Policy
+~~~~~~~~~~~~~~~~~~~~~~
-Note that *behavior changes* and *API breaking changes* are not identical. API
-breaking changes will only be released in major versions. If we consider a
-behavior to be a bug, and fixing that bug induces a behavior change, we'll
-release that change in a minor release. This is a sometimes difficult judgment
-call that we'll do our best on.
+Starting with Pandas 1.0.0, pandas will adopt a variant of `SemVer`_ to
+version releases. Briefly,
-This doesn't mean that pandas' pace of development will slow down. In the `2019
-Pandas User Survey`_, about 95% of the respondents said they considered pandas
-"stable enough". This indicates there's an appetite for new features, even if it
-comes at the cost of break API. The difference is that now API breaking changes
-will be accompanied with a bump in the major version number (e.g. pandas 1.5.1
--> 2.0.0).
+* Deprecations will be introduced in minor releases (e.g. 1.1.0, 1.2.0, 2.1.0, ...)
+* Deprecations will be enforced in major releases (e.g. 1.0.0, 2.0.0, 3.0.0, ...)
+* API-breaking changes will be made only in major releases (except for experimental features)
See :ref:`policies.version` for more.
@@ -43,20 +32,63 @@ See :ref:`policies.version` for more.
{{ header }}
-These are the changes in pandas 1.0.0. See :ref:`release` for a full changelog
-including other versions of pandas.
-
+.. ---------------------------------------------------------------------------
Enhancements
~~~~~~~~~~~~
+.. _whatsnew_100.NA:
+
+Experimental ``NA`` scalar to denote missing values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A new ``pd.NA`` value (singleton) is introduced to represent scalar missing
+values. Up to now, pandas used several values to represent missing data: ``np.nan`` is used for this for float data, ``np.nan`` or
+``None`` for object-dtype data and ``pd.NaT`` for datetime-like data. The
+goal of ``pd.NA`` is to provide a "missing" indicator that can be used
+consistently across data types. ``pd.NA`` is currently used by the nullable integer and boolean
+data types and the new string data type (:issue:`28095`).
+
+.. warning::
+
+ Experimental: the behaviour of ``pd.NA`` can still change without warning.
+
+For example, creating a Series using the nullable integer dtype:
+
+.. ipython:: python
+
+ s = pd.Series([1, 2, None], dtype="Int64")
+ s
+ s[2]
+
+Compared to ``np.nan``, ``pd.NA`` behaves differently in certain operations.
+In addition to arithmetic operations, ``pd.NA`` also propagates as "missing"
+or "unknown" in comparison operations:
+
+.. ipython:: python
+
+ np.nan > 1
+ pd.NA > 1
+
+For logical operations, ``pd.NA`` follows the rules of the
+`three-valued logic `__ (or
+*Kleene logic*). For example:
+
+.. ipython:: python
+
+ pd.NA | True
+
+For more, see :ref:`NA section ` in the user guide on missing
+data.
+
+
.. _whatsnew_100.string:
Dedicated string data type
^^^^^^^^^^^^^^^^^^^^^^^^^^
We've added :class:`StringDtype`, an extension type dedicated to string data.
-Previously, strings were typically stored in object-dtype NumPy arrays.
+Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`)
.. warning::
@@ -102,59 +134,15 @@ String accessor methods returning integers will return a value with :class:`Int6
We recommend explicitly using the ``string`` data type when working with strings.
See :ref:`text.types` for more.
-.. _whatsnew_100.NA:
-
-Experimental ``NA`` scalar to denote missing values
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-A new ``pd.NA`` value (singleton) is introduced to represent scalar missing
-values. Up to now, ``np.nan`` is used for this for float data, ``np.nan`` or
-``None`` for object-dtype data and ``pd.NaT`` for datetime-like data. The
-goal of ``pd.NA`` is provide a "missing" indicator that can be used
-consistently accross data types. For now, the nullable integer and boolean
-data types and the new string data type make use of ``pd.NA`` (:issue:`28095`).
-
-.. warning::
-
- Experimental: the behaviour of ``pd.NA`` can still change without warning.
-
-For example, creating a Series using the nullable integer dtype:
-
-.. ipython:: python
-
- s = pd.Series([1, 2, None], dtype="Int64")
- s
- s[2]
-
-Compared to ``np.nan``, ``pd.NA`` behaves differently in certain operations.
-In addition to arithmetic operations, ``pd.NA`` also propagates as "missing"
-or "unknown" in comparison operations:
-
-.. ipython:: python
-
- np.nan > 1
- pd.NA > 1
-
-For logical operations, ``pd.NA`` follows the rules of the
-`three-valued logic `__ (or
-*Kleene logic*). For example:
-
-.. ipython:: python
-
- pd.NA | True
-
-For more, see :ref:`NA section ` in the user guide on missing
-data.
-
.. _whatsnew_100.boolean:
Boolean data type with missing values support
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
We've added :class:`BooleanDtype` / :class:`~arrays.BooleanArray`, an extension
-type dedicated to boolean data that can hold missing values. With the default
-``'bool`` data type based on a numpy bool array, the column can only hold
-True or False values and not missing values. This new :class:`BooleanDtype`
+type dedicated to boolean data that can hold missing values. The default
+``bool`` data type based on a bool-dtype NumPy array, the column can only hold
+``True`` or ``False``, and not missing values. This new :class:`~arrays.BooleanArray`
can store missing values as well by keeping track of this in a separate mask.
(:issue:`29555`, :issue:`30095`)
@@ -169,7 +157,18 @@ You can use the alias ``"boolean"`` as well.
s = pd.Series([True, False, None], dtype="boolean")
s
-.. _whatsnew_1000.custom_window:
+.. _whatsnew_100.numba_rolling_apply:
+
+Using Numba in ``rolling.apply`` and ``expanding.apply``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We've added an ``engine`` keyword to :meth:`~core.window.rolling.Rolling.apply` and :meth:`~core.window.expanding.Expanding.apply`
+that allows the user to execute the routine using `Numba `__ instead of Cython.
+Using the Numba engine can yield significant performance gains if the apply function can operate on numpy arrays and
+the data set is larger (1 million rows or greater). For more details, see
+:ref:`rolling apply documentation ` (:issue:`28987`, :issue:`30936`)
+
+.. _whatsnew_100.custom_window:
Defining custom windows for rolling operations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -180,12 +179,25 @@ method on a :func:`pandas.api.indexers.BaseIndexer` subclass that will generate
indices used for each window during the rolling aggregation. For more details and example usage, see
the :ref:`custom window rolling documentation `
-.. _whatsnew_1000.enhancements.other:
+.. _whatsnew_100.to_markdown:
+
+Converting to Markdown
+^^^^^^^^^^^^^^^^^^^^^^
+
+We've added :meth:`~DataFrame.to_markdown` for creating a markdown table (:issue:`11052`)
+
+.. ipython:: python
+
+ df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=['a', 'a', 'b'])
+ print(df.to_markdown())
+
+.. _whatsnew_100.enhancements.other:
Other enhancements
^^^^^^^^^^^^^^^^^^
- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`)
+- Added the ``na_value`` argument to :meth:`Series.to_numpy`, :meth:`Index.to_numpy` and :meth:`DataFrame.to_numpy` to control the value used for missing data (:issue:`30322`)
- :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`)
- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
- The :ref:`integer dtype ` with support for missing values and the
@@ -201,12 +213,21 @@ Other enhancements
- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
- :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`)
- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`)
-- Roundtripping DataFrames with nullable integer or string data types to parquet
+- Roundtripping DataFrames with nullable integer, string and period data types to parquet
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
- now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
+ now preserve those data types with pyarrow >= 0.16.0 (:issue:`20612`, :issue:`28371`).
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
-- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
+- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
+- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue:`30270`)
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
+- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
+- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`)
+- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
+- Added new writer for exporting Stata dta files in versions 118 and 119, ``StataWriterUTF8``. These files formats support exporting strings containing Unicode characters. Format 119 supports data sets with more than 32,767 variables (:issue:`23573`, :issue:`30959`)
+- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
+- Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`)
+- :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`)
+- :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`)
Build Changes
@@ -217,12 +238,14 @@ cythonized files in the source distribution uploaded to PyPI (:issue:`28341`, :i
a built distribution (wheel) or via conda, this shouldn't have any effect on you. If you're building pandas from
source, you should no longer need to install Cython into your build environment before calling ``pip install pandas``.
-.. _whatsnew_1000.api_breaking:
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_100.api_breaking:
Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. _whatsnew_1000.api_breaking.MultiIndex._names:
+.. _whatsnew_100.api_breaking.MultiIndex._names:
Avoid using names from ``MultiIndex.levels``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -240,10 +263,10 @@ For backwards compatibility, you can still *access* the names via the levels.
mi.levels[0].name
However, it is no longer possible to *update* the names of the ``MultiIndex``
-via the name of the level. The following will **silently** fail to update the
-name of the ``MultiIndex``
+via the level.
.. ipython:: python
+ :okexcept:
mi.levels[0].name = "new name"
mi.names
@@ -270,52 +293,107 @@ New repr for :class:`~pandas.arrays.IntervalArray`
closed='right',
dtype='interval[int64]')
-
*pandas 1.0.0*
.. ipython:: python
pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)])
+``DataFrame.rename`` now only accepts one positional argument
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-All :class:`SeriesGroupBy` aggregation methods now respect the ``observed`` keyword
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-The following methods now also correctly output values for unobserved categories when called through ``groupby(..., observed=False)`` (:issue:`17605`)
+- :meth:`DataFrame.rename` would previously accept positional arguments that would lead
+ to ambiguous or undefined behavior. From pandas 1.0, only the very first argument, which
+ maps labels to their new names along the default axis, is allowed to be passed by position
+ (:issue:`29136`).
-- :meth:`SeriesGroupBy.count`
-- :meth:`SeriesGroupBy.size`
-- :meth:`SeriesGroupBy.nunique`
-- :meth:`SeriesGroupBy.nth`
+*pandas 0.25.x*
+
+.. code-block:: ipython
+
+ In [1]: df = pd.DataFrame([[1]])
+ In [2]: df.rename({0: 1}, {0: 2})
+ FutureWarning: ...Use named arguments to resolve ambiguity...
+ Out[2]:
+ 2
+ 1 1
+
+*pandas 1.0.0*
.. ipython:: python
+ :okexcept:
- df = pd.DataFrame({
- "cat_1": pd.Categorical(list("AABB"), categories=list("ABC")),
- "cat_2": pd.Categorical(list("AB") * 2, categories=list("ABC")),
- "value": [0.1] * 4,
- })
- df
+ df.rename({0: 1}, {0: 2})
+Note that errors will now be raised when conflicting or potentially ambiguous arguments are provided.
*pandas 0.25.x*
.. code-block:: ipython
- In [2]: df.groupby(["cat_1", "cat_2"], observed=False)["value"].count()
+ In [1]: df.rename({0: 1}, index={0: 2})
+ Out[1]:
+ 0
+ 1 1
+
+ In [2]: df.rename(mapper={0: 1}, index={0: 2})
Out[2]:
- cat_1 cat_2
- A A 1
- B 1
- B A 1
- B 1
- Name: value, dtype: int64
+ 0
+ 2 1
+
+*pandas 1.0.0*
+
+.. ipython:: python
+ :okexcept:
+ df.rename({0: 1}, index={0: 2})
+ df.rename(mapper={0: 1}, index={0: 2})
+
+You can still change the axis along which the first positional argument is applied by
+supplying the ``axis`` keyword argument.
+
+.. ipython:: python
+
+ df.rename({0: 1})
+ df.rename({0: 1}, axis=1)
+
+If you would like to update both the index and column labels, be sure to use the respective
+keywords.
+
+.. ipython:: python
+
+ df.rename(index={0: 1}, columns={0: 2})
+
+Extended verbose info output for :class:`~pandas.DataFrame`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- :meth:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`)
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+ >>> df = pd.DataFrame({"int_col": [1, 2, 3],
+ ... "text_col": ["a", "b", "c"],
+ ... "float_col": [0.0, 0.1, 0.2]})
+ >>> df.info(verbose=True)
+
+ RangeIndex: 3 entries, 0 to 2
+ Data columns (total 3 columns):
+ int_col 3 non-null int64
+ text_col 3 non-null object
+ float_col 3 non-null float64
+ dtypes: float64(1), int64(1), object(1)
+ memory usage: 152.0+ bytes
*pandas 1.0.0*
.. ipython:: python
- df.groupby(["cat_1", "cat_2"], observed=False)["value"].count()
+ df = pd.DataFrame({"int_col": [1, 2, 3],
+ "text_col": ["a", "b", "c"],
+ "float_col": [0.0, 0.1, 0.2]})
+ df.info(verbose=True)
:meth:`pandas.array` inference changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -350,6 +428,130 @@ The following methods now also correctly output values for unobserved categories
As a reminder, you can specify the ``dtype`` to disable all inference.
+:class:`arrays.IntegerArray` now uses :attr:`pandas.NA`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` rather than
+:attr:`numpy.nan` as its missing value marker (:issue:`29964`).
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+ >>> a = pd.array([1, 2, None], dtype="Int64")
+ >>> a
+
+ [1, 2, NaN]
+ Length: 3, dtype: Int64
+
+ >>> a[2]
+ nan
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+ a = pd.array([1, 2, None], dtype="Int64")
+ a
+ a[2]
+
+This has a few API-breaking consequences.
+
+**Converting to a NumPy ndarray**
+
+When converting to a NumPy array missing values will be ``pd.NA``, which cannot
+be converted to a float. So calling ``np.asarray(integer_array, dtype="float")``
+will now raise.
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+ >>> np.asarray(a, dtype="float")
+ array([ 1., 2., nan])
+
+*pandas 1.0.0*
+
+.. ipython:: python
+ :okexcept:
+
+ np.asarray(a, dtype="float")
+
+Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead.
+
+.. ipython:: python
+
+ a.to_numpy(dtype="float", na_value=np.nan)
+
+**Reductions can return ``pd.NA``**
+
+When performing a reduction such as a sum with ``skipna=False``, the result
+will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values
+(:issue:`30958`).
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+ >>> pd.Series(a).sum(skipna=False)
+ nan
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+ pd.Series(a).sum(skipna=False)
+
+**value_counts returns a nullable integer dtype**
+
+:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable
+integer dtype for the values.
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+ >>> pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype
+ dtype('int64')
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+ pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype
+
+See :ref:`missing_data.NA` for more on the differences between :attr:`pandas.NA`
+and :attr:`numpy.nan`.
+
+:class:`arrays.IntegerArray` comparisons return :class:`arrays.BooleanArray`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Comparison operations on a :class:`arrays.IntegerArray` now returns a
+:class:`arrays.BooleanArray` rather than a NumPy array (:issue:`29964`).
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+ >>> a = pd.array([1, 2, None], dtype="Int64")
+ >>> a
+
+ [1, 2, NaN]
+ Length: 3, dtype: Int64
+
+ >>> a > 1
+ array([False, True, False])
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+ a = pd.array([1, 2, None], dtype="Int64")
+ a > 1
+
+Note that missing values now propagate, rather than always comparing unequal
+like :attr:`numpy.nan`. See :ref:`missing_data.NA` for more.
+
By default :meth:`Categorical.min` now returns the minimum instead of np.nan
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -387,7 +589,14 @@ consistent with the behaviour of :class:`DataFrame` and :class:`Index`.
DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
Series([], dtype: float64)
-.. _whatsnew_1000.api_breaking.deps:
+.. _whatsnew_100.api_breaking.python:
+
+Increased minimum version for Python
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Pandas 1.0.0 supports Python 3.6.1 and higher (:issue:`29212`).
+
+.. _whatsnew_100.api_breaking.deps:
Increased minimum versions for dependencies
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -428,9 +637,11 @@ Optional libraries below the lowest tested version may still work, but are not c
+-----------------+-----------------+---------+
| matplotlib | 2.2.2 | |
+-----------------+-----------------+---------+
+| numba | 0.46.0 | X |
++-----------------+-----------------+---------+
| openpyxl | 2.5.7 | X |
+-----------------+-----------------+---------+
-| pyarrow | 0.12.0 | X |
+| pyarrow | 0.13.0 | X |
+-----------------+-----------------+---------+
| pymysql | 0.7.1 | |
+-----------------+-----------------+---------+
@@ -453,14 +664,13 @@ Optional libraries below the lowest tested version may still work, but are not c
See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
-
-.. _whatsnew_1000.api.other:
+.. _whatsnew_100.api.other:
Other API changes
^^^^^^^^^^^^^^^^^
- Bumped the minimum supported version of ``s3fs`` from 0.0.8 to 0.3.0 (:issue:`28616`)
-- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`)
+- :class:`core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`)
- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`)
- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`)
- In order to improve tab-completion, Pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``).
@@ -469,22 +679,25 @@ Other API changes
- Changed the default configuration value for ``options.matplotlib.register_converters`` from ``True`` to ``"auto"`` (:issue:`18720`).
Now, pandas custom formatters will only be applied to plots created by pandas, through :meth:`~DataFrame.plot`.
Previously, pandas' formatters would be applied to all plots created *after* a :meth:`~DataFrame.plot`.
- See :ref:`units registration ` for more.
+ See :ref:`units registration ` for more.
- :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter.
Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
- When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`)
- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`).
+- Added ```` to the list of default NA values for :meth:`read_csv` (:issue:`30821`)
-.. _whatsnew_1000.api.documentation:
+.. _whatsnew_100.api.documentation:
Documentation Improvements
^^^^^^^^^^^^^^^^^^^^^^^^^^
- Added new section on :ref:`scale` (:issue:`28315`).
-- Added sub-section Query MultiIndex in IO tools user guide (:issue:`28791`)
+- Added sub-section on :ref:`io.query_multi` for HDF5 datasets (:issue:`28791`).
-.. _whatsnew_1000.deprecations:
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_100.deprecations:
Deprecations
~~~~~~~~~~~~
@@ -495,31 +708,70 @@ Deprecations
is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`).
- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`)
- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`)
-- :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`)
+- :meth:`DateOffset.isAnchored` and :meth:`DatetOffset.onOffset` are deprecated and will be removed in a future version, use :meth:`DateOffset.is_anchored` and :meth:`DateOffset.is_on_offset` instead (:issue:`30340`)
+- ``pandas.tseries.frequencies.get_offset`` is deprecated and will be removed in a future version, use ``pandas.tseries.frequencies.to_offset`` instead (:issue:`4205`)
+- :meth:`Categorical.take_nd` and :meth:`CategoricalIndex.take_nd` are deprecated, use :meth:`Categorical.take` and :meth:`CategoricalIndex.take` instead (:issue:`27745`)
- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`)
- The parameter ``label`` in :func:`lreshape` has been deprecated and will be removed in a future version (:issue:`29742`)
- ``pandas.core.index`` has been deprecated and will be removed in a future version, the public classes are available in the top-level namespace (:issue:`19711`)
- :func:`pandas.json_normalize` is now exposed in the top-level namespace.
Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and
it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
+- The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`).
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
+- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
+- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
+- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
+- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
+- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
+- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
+- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`)
+
+**Selecting Columns from a Grouped DataFrame**
+When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated,
+a list of items should be used instead. (:issue:`23566`) For example:
-.. _whatsnew_1000.prior_deprecations:
+.. code-block:: ipython
+
+ df = pd.DataFrame({
+ "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+ "B": np.random.randn(8),
+ "C": np.random.randn(8),
+ })
+ g = df.groupby('A')
+ # single key, returns SeriesGroupBy
+ g['B']
-Removed SparseSeries and SparseDataFrame
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # tuple of single key, returns SeriesGroupBy
+ g[('B',)]
+
+ # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning
+ g[('B', 'C')]
+
+ # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning
+ # (implicitly converts the passed strings into a single tuple)
+ g['B', 'C']
+
+ # proper way, returns DataFrameGroupBy
+ g[['B', 'C']]
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_100.prior_deprecations:
+
+Removal of prior version deprecations/changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Removed SparseSeries and SparseDataFrame**
``SparseSeries``, ``SparseDataFrame`` and the ``DataFrame.to_sparse`` method
have been removed (:issue:`28425`). We recommend using a ``Series`` or
``DataFrame`` with sparse values instead. See :ref:`sparse.migration` for help
with migrating existing code.
-Removal of prior version deprecations/changes
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. _whatsnew_1000.matplotlib_units:
+.. _whatsnew_100.matplotlib_units:
**Matplotlib unit registration**
@@ -540,121 +792,125 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
**Other removals**
-- Removed the previously deprecated "index" keyword from :func:`read_stata`, :class:`StataReader`, and :meth:`StataReader.read`, use "index_col" instead (:issue:`17328`)
-- Removed the previously deprecated :meth:`StataReader.data` method, use :meth:`StataReader.read` instead (:issue:`9493`)
-- Removed the previously deprecated :func:`pandas.plotting._matplotlib.tsplot`, use :meth:`Series.plot` instead (:issue:`19980`)
-- :func:`pandas.tseries.converter.register` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`)
+- Removed the previously deprecated keyword "index" from :func:`read_stata`, :class:`StataReader`, and :meth:`StataReader.read`, use "index_col" instead (:issue:`17328`)
+- Removed ``StataReader.data`` method, use :meth:`StataReader.read` instead (:issue:`9493`)
+- Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`)
+- ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`)
- :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`)
- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`)
- Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`)
- :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`)
-- :func:`pandas.api.types.infer_dtype` argument ``skipna`` defaults to ``True`` instead of ``False`` (:issue:`24050`)
-- Removed the previously deprecated :attr:`Series.ix` and :attr:`DataFrame.ix` (:issue:`26438`)
-- Removed the previously deprecated :meth:`Index.summary` (:issue:`18217`)
-- Removed the previously deprecated "fastpath" keyword from the :class:`Index` constructor (:issue:`23110`)
-- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
-- Removed the previously deprecated :meth:`Series.compound` and :meth:`DataFrame.compound` (:issue:`26405`)
-- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`)
-- Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`)
-- :func:`to_datetime` and :func:`to_timedelta` no longer accept "box" argument, always returns :class:`DatetimeIndex`, :class:`TimedeltaIndex`, :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`)
+- Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`)
+- Removed ``Series.ix`` and ``DataFrame.ix`` (:issue:`26438`)
+- Removed ``Index.summary`` (:issue:`18217`)
+- Removed the previously deprecated keyword "fastpath" from the :class:`Index` constructor (:issue:`23110`)
+- Removed ``Series.get_value``, ``Series.set_value``, ``DataFrame.get_value``, ``DataFrame.set_value`` (:issue:`17739`)
+- Removed ``Series.compound`` and ``DataFrame.compound`` (:issue:`26405`)
+- Changed the default "inplace" argument in :meth:`DataFrame.set_index` and :meth:`Series.set_axis` from ``None`` to ``False`` (:issue:`27600`)
+- Removed ``Series.cat.categorical``, ``Series.cat.index``, ``Series.cat.name`` (:issue:`24751`)
+- Removed the previously deprecated keyword "box" from :func:`to_datetime` and :func:`to_timedelta`; in addition these now always returns :class:`DatetimeIndex`, :class:`TimedeltaIndex`, :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`)
- :func:`to_timedelta`, :class:`Timedelta`, and :class:`TimedeltaIndex` no longer allow "M", "y", or "Y" for the "unit" argument (:issue:`23264`)
-- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
+- Removed the previously deprecated keyword "time_rule" from (non-public) ``offsets.generate_range``, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`)
- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)
-- :func:`concat` parameter "join_axes" has been removed, use ``reindex_like`` on the result instead (:issue:`22318`)
-- Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`)
-- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
+- Removed the previously deprecated keyword "join_axes" from :func:`concat`; use ``reindex_like`` on the result instead (:issue:`22318`)
+- Removed the previously deprecated keyword "by" from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`)
+- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`18529`)
- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`)
- Passing ``int64`` values to :class:`DatetimeIndex` and a timezone now interprets the values as nanosecond timestamps in UTC, not wall times in the given timezone (:issue:`24559`)
- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`)
-- Removed the previously deprecated :meth:`Index.contains`, use ``key in index`` instead (:issue:`30103`)
+- Removed ``Index.contains``, use ``key in index`` instead (:issue:`30103`)
- Addition and subtraction of ``int`` or integer-arrays is no longer allowed in :class:`Timestamp`, :class:`DatetimeIndex`, :class:`TimedeltaIndex`, use ``obj + n * obj.freq`` instead of ``obj + n`` (:issue:`22535`)
-- Removed :meth:`Series.from_array` (:issue:`18258`)
-- Removed :meth:`DataFrame.from_items` (:issue:`18458`)
-- Removed :meth:`DataFrame.as_matrix`, :meth:`Series.as_matrix` (:issue:`18458`)
-- Removed :meth:`Series.asobject` (:issue:`18477`)
-- Removed :meth:`DataFrame.as_blocks`, :meth:`Series.as_blocks`, `DataFrame.blocks`, :meth:`Series.blocks` (:issue:`17656`)
+- Removed ``Series.ptp`` (:issue:`21614`)
+- Removed ``Series.from_array`` (:issue:`18258`)
+- Removed ``DataFrame.from_items`` (:issue:`18458`)
+- Removed ``DataFrame.as_matrix``, ``Series.as_matrix`` (:issue:`18458`)
+- Removed ``Series.asobject`` (:issue:`18477`)
+- Removed ``DataFrame.as_blocks``, ``Series.as_blocks``, ``DataFrame.blocks``, ``Series.blocks`` (:issue:`17656`)
- :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`)
- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`)
- :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`)
-- :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors no longer allow ``start``, ``end``, and ``periods`` keywords, use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`)
-- :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors no longer have a ``verify_integrity`` keyword argument (:issue:`23919`)
-- ``pandas.core.internals.blocks.make_block`` no longer accepts the "fastpath" keyword(:issue:`19265`)
-- :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`)
-- Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
-- Removed the previously deprecated :meth:`MultiIndex.to_hierarchical` (:issue:`21613`)
-- Removed the previously deprecated :attr:`MultiIndex.labels`, use :attr:`MultiIndex.codes` instead (:issue:`23752`)
-- Removed the previously deprecated "labels" keyword from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`)
-- Removed the previously deprecated :meth:`MultiIndex.set_labels`, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`)
-- Removed the previously deprecated "labels" keyword from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`)
+- Removed the previously deprecated keywords "start", "end", and "periods" from the :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors; use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`)
+- Removed the previously deprecated keyword "verify_integrity" from the :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors (:issue:`23919`)
+- Removed the previously deprecated keyword "fastpath" from ``pandas.core.internals.blocks.make_block`` (:issue:`19265`)
+- Removed the previously deprecated keyword "dtype" from :meth:`Block.make_block_same_class` (:issue:`19434`)
+- Removed ``ExtensionArray._formatting_values``. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
+- Removed ``MultiIndex.to_hierarchical`` (:issue:`21613`)
+- Removed ``MultiIndex.labels``, use :attr:`MultiIndex.codes` instead (:issue:`23752`)
+- Removed the previously deprecated keyword "labels" from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`)
+- Removed ``MultiIndex.set_labels``, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`)
+- Removed the previously deprecated keyword "labels" from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`)
- Removed support for legacy HDF5 formats (:issue:`29787`)
- Passing a dtype alias (e.g. 'datetime64[ns, UTC]') to :class:`DatetimeTZDtype` is no longer allowed, use :meth:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`)
-- :func:`read_excel` removed support for "skip_footer" argument, use "skipfooter" instead (:issue:`18836`)
+- Removed the previously deprecated keyword "skip_footer" from :func:`read_excel`; use "skipfooter" instead (:issue:`18836`)
- :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`)
-- :meth:`DataFrame.to_records` no longer supports the argument "convert_datetime64" (:issue:`18902`)
-- Removed the previously deprecated ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
-- Changed the default value for the "keep_tz" argument in :meth:`DatetimeIndex.to_series` to ``True`` (:issue:`23739`)
-- Removed the previously deprecated :func:`api.types.is_period` and :func:`api.types.is_datetimetz` (:issue:`23917`)
+- Removed the previously deprecated keyword "convert_datetime64" from :meth:`DataFrame.to_records` (:issue:`18902`)
+- Removed ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
+- Changed the default "keep_tz" argument in :meth:`DatetimeIndex.to_series` from ``None`` to ``True`` (:issue:`23739`)
+- Removed ``api.types.is_period`` and ``api.types.is_datetimetz`` (:issue:`23917`)
- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`)
-- Removed previously deprecated :func:`pandas.tseries.plotting.tsplot` (:issue:`18627`)
-- Removed the previously deprecated ``reduce`` and ``broadcast`` arguments from :meth:`DataFrame.apply` (:issue:`18577`)
-- Removed the previously deprecated ``assert_raises_regex`` function in ``pandas.util.testing`` (:issue:`29174`)
+- Removed ``pandas.tseries.plotting.tsplot`` (:issue:`18627`)
+- Removed the previously deprecated keywords "reduce" and "broadcast" from :meth:`DataFrame.apply` (:issue:`18577`)
+- Removed the previously deprecated ``assert_raises_regex`` function in ``pandas._testing`` (:issue:`29174`)
- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`)
-- Removed previously deprecated "nthreads" argument from :func:`read_feather`, use "use_threads" instead (:issue:`23053`)
-- Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`)
-- Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`)
-- Removed the previously deprecated :meth:`Series.valid`; use :meth:`Series.dropna` instead (:issue:`18800`)
-- Removed the previously properties :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`)
-- Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
-- Removed the previously deprecated :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`)
-- Removed the previously deprecated :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
-- Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
+- Removed the previously deprecated keyword "nthreads" from :func:`read_feather`, use "use_threads" instead (:issue:`23053`)
+- Removed ``Index.is_lexsorted_for_tuple`` (:issue:`29305`)
+- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`29608`)
+- Removed ``Series.valid``; use :meth:`Series.dropna` instead (:issue:`18800`)
+- Removed ``DataFrame.is_copy``, ``Series.is_copy`` (:issue:`18812`)
+- Removed ``DataFrame.get_ftype_counts``, ``Series.get_ftype_counts`` (:issue:`18243`)
+- Removed ``DataFrame.ftypes``, ``Series.ftypes``, ``Series.ftype`` (:issue:`26744`)
+- Removed ``Index.get_duplicates``, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
+- Removed ``Series.clip_upper``, ``Series.clip_lower``, ``DataFrame.clip_upper``, ``DataFrame.clip_lower`` (:issue:`24203`)
- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
-- Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)
-- Removed the previously deprecated :meth:`DatetimeIndex.asobject`, :meth:`TimedeltaIndex.asobject`, :meth:`PeriodIndex.asobject`, use ``astype(object)`` instead (:issue:`29801`)
-- Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`)
-- :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`)
-- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`)
-- Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
-- Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
-- Removed previously deprecated keywords ``how``, ``fill_method``, and ``limit`` from :meth:`DataFrame.resample` (:issue:`30139`)
+- Removed ``DatetimeIndex.offset`` (:issue:`20730`)
+- Removed ``DatetimeIndex.asobject``, ``TimedeltaIndex.asobject``, ``PeriodIndex.asobject``, use ``astype(object)`` instead (:issue:`29801`)
+- Removed the previously deprecated keyword "order" from :func:`factorize` (:issue:`19751`)
+- Removed the previously deprecated keyword "encoding" from :func:`read_stata` and :meth:`DataFrame.to_stata` (:issue:`21400`)
+- Changed the default "sort" argument in :func:`concat` from ``None`` to ``False`` (:issue:`20613`)
+- Removed the previously deprecated keyword "raise_conflict" from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
+- Removed the previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
+- Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`)
- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`)
- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`)
-- Removed previously deprecated :meth:`Series.nonzero`, use `to_numpy().nonzero()` instead (:issue:`24048`)
+- Removed ``Series.nonzero``, use ``to_numpy().nonzero()`` instead (:issue:`24048`)
- Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`)
-- :meth:`Series.str.partition` and :meth:`Series.str.rpartition` no longer accept "pat" keyword, use "sep" instead (:issue:`23767`)
-- Removed the previously deprecated :meth:`Series.put` (:issue:`27106`)
-- Removed the previously deprecated :attr:`Series.real`, :attr:`Series.imag` (:issue:`27106`)
-- Removed the previously deprecated :meth:`Series.to_dense`, :meth:`DataFrame.to_dense` (:issue:`26684`)
-- Removed the previously deprecated :meth:`Index.dtype_str`, use ``str(index.dtype)`` instead (:issue:`27106`)
+- Removed the previously deprecated keyword "pat" from :meth:`Series.str.partition` and :meth:`Series.str.rpartition`, use "sep" instead (:issue:`23767`)
+- Removed ``Series.put`` (:issue:`27106`)
+- Removed ``Series.real``, ``Series.imag`` (:issue:`27106`)
+- Removed ``Series.to_dense``, ``DataFrame.to_dense`` (:issue:`26684`)
+- Removed ``Index.dtype_str``, use ``str(index.dtype)`` instead (:issue:`27106`)
- :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`)
- The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`)
-- Removed previously deprecated :meth:`Series.get_dtype_counts` and :meth:`DataFrame.get_dtype_counts` (:issue:`27145`)
-- Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`)
-- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `,
-- :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`)
+- Removed ``Series.get_dtype_counts`` and ``DataFrame.get_dtype_counts`` (:issue:`27145`)
+- Changed the default "fill_value" argument in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`)
+- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` from ``None`` to ``False`` (:issue:`20584`)
- Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`)
- Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`)
-- Removed the previously deprecated :attr:`Series.base`, :attr:`Index.base`, :attr:`Categorical.base`, :attr:`Series.flags`, :attr:`Index.flags`, :attr:`PeriodArray.flags`, :attr:`Series.strides`, :attr:`Index.strides`, :attr:`Series.itemsize`, :attr:`Index.itemsize`, :attr:`Series.data`, :attr:`Index.data` (:issue:`20721`)
+- Removed ``Series.base``, ``Index.base``, ``Categorical.base``, ``Series.flags``, ``Index.flags``, ``PeriodArray.flags``, ``Series.strides``, ``Index.strides``, ``Series.itemsize``, ``Index.itemsize``, ``Series.data``, ``Index.data`` (:issue:`20721`)
- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`)
-- Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`)
-- Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`)
-- Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`)
+- Removed ``Timestamp.weekday_name``, ``DatetimeIndex.weekday_name``, and ``Series.dt.weekday_name`` (:issue:`18164`)
+- Removed the previously deprecated keyword "errors" in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`)
+- Changed the default "ordered" argument in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`)
- :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`)
-- Removed the previously deprecated :func:`to_msgpack`, :func:`read_msgpack`, :meth:`DataFrame.to_msgpack`, :meth:`Series.to_msgpack` (:issue:`27103`)
--
+- Removed ``to_msgpack``, ``read_msgpack``, ``DataFrame.to_msgpack``, ``Series.to_msgpack`` (:issue:`27103`)
+- Removed ``Series.compress`` (:issue:`21930`)
- Removed the previously deprecated keyword "fill_value" from :meth:`Categorical.fillna`, use "value" instead (:issue:`19269`)
- Removed the previously deprecated keyword "data" from :func:`andrews_curves`, use "frame" instead (:issue:`6956`)
- Removed the previously deprecated keyword "data" from :func:`parallel_coordinates`, use "frame" instead (:issue:`6956`)
- Removed the previously deprecated keyword "colors" from :func:`parallel_coordinates`, use "color" instead (:issue:`6956`)
- Removed the previously deprecated keywords "verbose" and "private_key" from :func:`read_gbq` (:issue:`30200`)
+- Calling ``np.array`` and ``np.asarray`` on tz-aware :class:`Series` and :class:`DatetimeIndex` will now return an object array of tz-aware :class:`Timestamp` (:issue:`24596`)
-
-.. _whatsnew_1000.performance:
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_100.performance:
Performance improvements
~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement in :class:`DataFrame` arithmetic and comparison operations with scalars (:issue:`24990`, :issue:`29853`)
- Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`)
- Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`)
- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`)
@@ -669,7 +925,9 @@ Performance improvements
- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`)
- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`)
-.. _whatsnew_1000.bug_fixes:
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_100.bug_fixes:
Bug fixes
~~~~~~~~~
@@ -691,6 +949,11 @@ Categorical
:class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`)
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`)
- Bug where calling :meth:`Categorical.min` or :meth:`Categorical.max` on an empty Categorical would raise a numpy exception (:issue:`30227`)
+- The following methods now also correctly output values for unobserved categories when called through ``groupby(..., observed=False)`` (:issue:`17605`)
+ * :meth:`core.groupby.SeriesGroupBy.count`
+ * :meth:`core.groupby.SeriesGroupBy.size`
+ * :meth:`core.groupby.SeriesGroupBy.nunique`
+ * :meth:`core.groupby.SeriesGroupBy.nth`
Datetimelike
@@ -699,22 +962,31 @@ Datetimelike
- Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`)
- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`)
- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` with errors="coerce" could incorrectly lead to raising ``ValueError`` (:issue:`28299`)
-- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`)
+- Bug in :meth:`core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`)
- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
+- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`)
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`)
-- Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
+- Bug in :func:`core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
- Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`)
- Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`)
- Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`)
+- Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`)
- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`)
- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`)
- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`)
+- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`)
- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`)
+- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`)
+- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`)
+- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`)
+- Bug in :func:`date_range` with custom business hours as ``freq`` and given number of ``periods`` (:issue:`30593`)
+- Bug in :class:`PeriodIndex` comparisons with incorrectly casting integers to :class:`Period` objects, inconsistent with the :class:`Period` comparison behavior (:issue:`30722`)
+- Bug in :meth:`DatetimeIndex.insert` raising a ``ValueError`` instead of a ``TypeError`` when trying to insert a timezone-aware :class:`Timestamp` into a timezone-naive :class:`DatetimeIndex`, or vice-versa (:issue:`30806`)
Timedelta
^^^^^^^^^
@@ -743,6 +1015,8 @@ Numeric
- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)
- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)
+- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`)
+- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`)
Conversion
^^^^^^^^^^
@@ -753,7 +1027,7 @@ Conversion
Strings
^^^^^^^
-- Calling :meth:`Series.str.isalnum` (and other "ismethods") on an empty Series would return an object dtype instead of bool (:issue:`29624`)
+- Calling :meth:`Series.str.isalnum` (and other "ismethods") on an empty ``Series`` would return an ``object`` dtype instead of ``bool`` (:issue:`29624`)
-
@@ -762,6 +1036,9 @@ Interval
- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`)
- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`)
+- Bug in :class:`Series` constructor where constructing a ``Series`` from a ``list`` of :class:`Interval` objects resulted in ``object`` dtype instead of :class:`IntervalDtype` (:issue:`23563`)
+- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`)
+- Bug in :class:`IntervalIndex`, :class:`~arrays.IntervalArray`, and :class:`Series` with interval data where equality comparisons were incorrect (:issue:`24112`)
Indexing
^^^^^^^^
@@ -773,8 +1050,11 @@ Indexing
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`)
- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`)
-- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
+- :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`)
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
+- :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`)
+- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`)
+- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`)
Missing
^^^^^^^
@@ -785,8 +1065,8 @@ Missing
MultiIndex
^^^^^^^^^^
-- Constructior for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`)
--
+- Constructor for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`)
+- Series and MultiIndex `.drop` with `MultiIndex` raise exception if labels not in given in level (:issue:`8594`)
-
I/O
@@ -810,41 +1090,47 @@ I/O
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
+- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
+- :func:`read_excel` now accepts binary data (:issue:`15914`)
+- Bug in :meth:`read_csv` in which encoding handling was limited to just the string `utf-16` for the C engine (:issue:`24130`)
Plotting
^^^^^^^^
- Bug in :meth:`Series.plot` not able to plot boolean values (:issue:`23719`)
--
- Bug in :meth:`DataFrame.plot` not able to plot when no rows (:issue:`27758`)
- Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`)
- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`)
- Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`)
-- Bug where :meth:`DataFrame.boxplot` would not accept a `color` parameter like `DataFrame.plot.box` (:issue:`26214`)
+- Bug where :meth:`DataFrame.boxplot` would not accept a ``color`` parameter like :meth:`DataFrame.plot.box` (:issue:`26214`)
- Bug in the ``xticks`` argument being ignored for :meth:`DataFrame.plot.bar` (:issue:`14119`)
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
-- :meth:`DataFrame.plot` now allow a ``backend`` keyword arugment to allow changing between backends in one session (:issue:`28619`).
+- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`).
- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
+- Allow :meth:`DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
+- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`).
Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^
--
+- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`)
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
-- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
+- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty :class:`Series` or :class:`DataFrame` (:issue:`28427`)
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`)
- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue:`15584`).
- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue:`19248`).
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
-- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
+- Bug in :meth:`core.groupby.DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
- Remove error raised due to duplicated input functions in named aggregation in :meth:`DataFrame.groupby` and :meth:`Series.groupby`. Previously error will be raised if the same function is applied on the same column and now it is allowed if new assigned names are different. (:issue:`28426`)
-- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479)
-- Bug in :meth:`DataFrameGroupBy.rolling().quantile()` ignoring ``interpolation`` keyword argument (:issue:`28779`)
+- :meth:`core.groupby.SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue:`28479`)
+- Bug in :meth:`core.window.rolling.Rolling.quantile` ignoring ``interpolation`` keyword argument when used within a groupby (:issue:`28779`)
- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`)
-- Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`)
+- Bug in :meth:`core.groupby.DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`)
- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`)
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
+- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
+- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
Reshaping
^^^^^^^^^
@@ -857,17 +1143,20 @@ Reshaping
- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
-- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
+- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`)
+- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ``ValueError`` (:issue:`28664`)
- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
+- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
--
+- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
+- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`)
Sparse
^^^^^^
- Bug in :class:`SparseDataFrame` arithmetic operations incorrectly casting inputs to float (:issue:`28107`)
--
+- Bug in ``DataFrame.sparse`` returning a ``Series`` when there was a column named ``sparse`` rather than the accessor (:issue:`30758`)
-
ExtensionArray
@@ -875,7 +1164,7 @@ ExtensionArray
- Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`).
- Bug where nullable integers could not be compared to strings (:issue:`28930`)
-- Bug where :class:`DataFrame` constructor raised ValueError with list-like data and ``dtype`` specified (:issue:`30280`)
+- Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`)
Other
@@ -886,15 +1175,26 @@ Other
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`)
+- Backtick quoting in :meth:`DataFrame.query` and :meth:`DataFrame.eval` can now also be used to use invalid identifiers like names that start with a digit, are python keywords, or are using single character operators. (:issue:`27017`)
+- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`)
- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`)
- Fix :class:`AbstractHolidayCalendar` to return correct results for
years after 2030 (now goes up to 2200) (:issue:`27790`)
-- Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`)
-- Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
+- Fixed :class:`~arrays.IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by ``0`` (:issue:`27398`)
+- Fixed ``pow`` operations for :class:`~arrays.IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)
+- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
+- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
+- Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
+- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
+- Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`)
+- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`)
+.. ---------------------------------------------------------------------------
-.. _whatsnew_1000.contributors:
+.. _whatsnew_100.contributors:
Contributors
~~~~~~~~~~~~
+
+.. contributors:: v0.25.3..v1.0.0rc0
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
new file mode 100644
index 0000000000000..01c089b46b4a1
--- /dev/null
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -0,0 +1,172 @@
+.. _whatsnew_110:
+
+What's new in 1.1.0 (??)
+------------------------
+
+These are the changes in pandas 1.1.0. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+
+Enhancements
+~~~~~~~~~~~~
+
+.. _whatsnew_110.enhancements.other:
+
+Other enhancements
+^^^^^^^^^^^^^^^^^^
+
+- :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
+-
+-
+
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_110.deprecations:
+
+Deprecations
+~~~~~~~~~~~~
+
+-
+-
+
+.. ---------------------------------------------------------------------------
+
+
+.. _whatsnew_110.performance:
+
+Performance improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+-
+-
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_110.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+
+
+Categorical
+^^^^^^^^^^^
+
+-
+-
+
+Datetimelike
+^^^^^^^^^^^^
+- Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`)
+- :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`)
+-
+
+Timedelta
+^^^^^^^^^
+
+-
+-
+
+Timezones
+^^^^^^^^^
+
+-
+-
+
+
+Numeric
+^^^^^^^
+-
+-
+
+Conversion
+^^^^^^^^^^
+- Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`)
+-
+-
+
+Strings
+^^^^^^^
+
+-
+-
+
+
+Interval
+^^^^^^^^
+
+-
+-
+
+Indexing
+^^^^^^^^
+- Bug in slicing on a :class:`DatetimeIndex` with a partial-timestamp dropping high-resolution indices near the end of a year, quarter, or month (:issue:`31064`)
+-
+-
+
+Missing
+^^^^^^^
+
+-
+-
+
+MultiIndex
+^^^^^^^^^^
+
+-
+-
+
+I/O
+^^^
+
+-
+-
+
+Plotting
+^^^^^^^^
+
+-
+-
+
+Groupby/resample/rolling
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
+
+Reshaping
+^^^^^^^^^
+
+-
+- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
+- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
+- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
+
+
+Sparse
+^^^^^^
+
+-
+-
+
+ExtensionArray
+^^^^^^^^^^^^^^
+
+-
+-
+
+
+Other
+^^^^^
+- Appending a dictionary to a :class:`DataFrame` without passing ``ignore_index=True`` will raise ``TypeError: Can only append a dict if ignore_index=True``
+ instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`)
+-
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_110.contributors:
+
+Contributors
+~~~~~~~~~~~~
diff --git a/doc/sphinxext/announce.py b/doc/sphinxext/announce.py
index fdc5a6b283ba8..f394aac5c545b 100755
--- a/doc/sphinxext/announce.py
+++ b/doc/sphinxext/announce.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# -*- encoding:utf-8 -*-
"""
Script to generate contributor and pull request lists
diff --git a/environment.yml b/environment.yml
index 2b171d097a693..5f1184e921119 100644
--- a/environment.yml
+++ b/environment.yml
@@ -27,13 +27,13 @@ dependencies:
# documentation
- gitpython # obtain contributors from git for whatsnew
- sphinx
- - numpydoc>=0.9.0
# documentation (jupyter notebooks)
- nbconvert>=5.4.1
- nbsphinx
- pandoc
- # Dask and its dependencies
+
+ # Dask and its dependencies (that dont install with dask)
- dask-core
- toolz>=0.7.3
- fsspec>=0.5.1
@@ -54,6 +54,9 @@ dependencies:
- pytest>=5.0.1
- pytest-cov
- pytest-xdist>=1.21
+ - pytest-asyncio
+
+ # downstream tests
- seaborn
- statsmodels
@@ -67,29 +70,38 @@ dependencies:
- blosc
- bottleneck>=1.2.1
- ipykernel
- - ipython>=5.6.0
+ - ipython>=7.11.1
- jinja2 # pandas.Styler
- matplotlib>=2.2.2 # pandas.plotting, Series.plot, DataFrame.plot
- numexpr>=2.6.8
- scipy>=1.1
+ - numba>=0.46.0
# optional for io
- - beautifulsoup4>=4.6.0 # pandas.read_html
+ # ---------------
+ # pd.read_html
+ - beautifulsoup4>=4.6.0
+ - html5lib
+ - lxml
+
+ # pd.read_excel, DataFrame.to_excel, pd.ExcelWriter, pd.ExcelFile
+ - openpyxl<=3.0.1
+ - xlrd
+ - xlsxwriter
+ - xlwt
+ - odfpy
+
- fastparquet>=0.3.2 # pandas.read_parquet, DataFrame.to_parquet
- - html5lib # pandas.read_html
- - lxml # pandas.read_html
- - openpyxl<=3.0.1 # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
- pyarrow>=0.13.1 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
+ - python-snappy # required by pyarrow
+
- pyqt>=5.9.2 # pandas.read_clipboard
- pytables>=3.4.2 # pandas.read_hdf, DataFrame.to_hdf
- - python-snappy # required by pyarrow
- s3fs # pandas.read_csv... when using 's3://...' path
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
- xarray # DataFrame.to_xarray
- - xlrd # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
- - xlsxwriter # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
- - xlwt # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
- - odfpy # pandas.read_excel
- pyreadstat # pandas.read_spss
+ - tabulate>=0.8.3 # DataFrame.to_markdown
- pip:
- git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
+ - git+https://github.com/numpy/numpydoc
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 30b7e5bafe1df..d526531b159b2 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -10,7 +10,7 @@
try:
__import__(dependency)
except ImportError as e:
- missing_dependencies.append("{0}: {1}".format(dependency, str(e)))
+ missing_dependencies.append(f"{dependency}: {e}")
if missing_dependencies:
raise ImportError(
@@ -33,14 +33,11 @@
# hack but overkill to use re
module = str(e).replace("cannot import name ", "")
raise ImportError(
- "C extension: {0} not built. If you want to import "
+ f"C extension: {module} not built. If you want to import "
"pandas from the source directory, you may need to run "
- "'python setup.py build_ext --inplace --force' to build "
- "the C extensions first.".format(module)
+ "'python setup.py build_ext --inplace --force' to build the C extensions first."
)
-from datetime import datetime
-
from pandas._config import (
get_option,
set_option,
@@ -105,7 +102,6 @@
to_datetime,
to_timedelta,
# misc
- np,
Grouper,
factorize,
unique,
@@ -118,7 +114,7 @@
DataFrame,
)
-from pandas.core.arrays.sparse import SparseArray, SparseDtype
+from pandas.core.arrays.sparse import SparseDtype
from pandas.tseries.api import infer_freq
from pandas.tseries import offsets
@@ -141,6 +137,7 @@
qcut,
)
+import pandas.api
from pandas.util._print_versions import show_versions
from pandas.io.api import (
@@ -189,7 +186,6 @@
__git_version__ = v.get("full-revisionid")
del get_versions, v
-
# GH 27101
# TODO: remove Panel compat in 1.0
if pandas.compat.PY37:
@@ -201,8 +197,7 @@ def __getattr__(name):
warnings.warn(
"The Panel class is removed from pandas. Accessing it "
- "from the top-level namespace will also be removed in "
- "the next version",
+ "from the top-level namespace will also be removed in the next version",
FutureWarning,
stacklevel=2,
)
@@ -211,18 +206,57 @@ class Panel:
pass
return Panel
+
+ elif name == "datetime":
+ warnings.warn(
+ "The pandas.datetime class is deprecated "
+ "and will be removed from pandas in a future version. "
+ "Import from datetime module instead.",
+ FutureWarning,
+ stacklevel=2,
+ )
+
+ from datetime import datetime as dt
+
+ return dt
+
+ elif name == "np":
+
+ warnings.warn(
+ "The pandas.np module is deprecated "
+ "and will be removed from pandas in a future version. "
+ "Import numpy directly instead",
+ FutureWarning,
+ stacklevel=2,
+ )
+ import numpy as np
+
+ return np
+
elif name in {"SparseSeries", "SparseDataFrame"}:
warnings.warn(
- "The {} class is removed from pandas. Accessing it from "
- "the top-level namespace will also be removed in the next "
- "version".format(name),
+ f"The {name} class is removed from pandas. Accessing it from "
+ "the top-level namespace will also be removed in the next version",
FutureWarning,
stacklevel=2,
)
return type(name, (), {})
- raise AttributeError("module 'pandas' has no attribute '{}'".format(name))
+ elif name == "SparseArray":
+
+ warnings.warn(
+ "The pandas.SparseArray class is deprecated "
+ "and will be removed from pandas in a future version. "
+ "Use pandas.arrays.SparseArray instead.",
+ FutureWarning,
+ stacklevel=2,
+ )
+ from pandas.core.arrays.sparse import SparseArray as _SparseArray
+
+ return _SparseArray
+
+ raise AttributeError(f"module 'pandas' has no attribute '{name}'")
else:
@@ -236,6 +270,96 @@ class SparseDataFrame:
class SparseSeries:
pass
+ class __numpy:
+ def __init__(self):
+ import numpy as np
+ import warnings
+
+ self.np = np
+ self.warnings = warnings
+
+ def __getattr__(self, item):
+ self.warnings.warn(
+ "The pandas.np module is deprecated "
+ "and will be removed from pandas in a future version. "
+ "Import numpy directly instead",
+ FutureWarning,
+ stacklevel=2,
+ )
+
+ try:
+ return getattr(self.np, item)
+ except AttributeError:
+ raise AttributeError(f"module numpy has no attribute {item}")
+
+ np = __numpy()
+
+ class __Datetime(type):
+
+ from datetime import datetime as dt
+
+ datetime = dt
+
+ def __getattr__(cls, item):
+ cls.emit_warning()
+
+ try:
+ return getattr(cls.datetime, item)
+ except AttributeError:
+ raise AttributeError(f"module datetime has no attribute {item}")
+
+ def __instancecheck__(cls, other):
+ return isinstance(other, cls.datetime)
+
+ class __DatetimeSub(metaclass=__Datetime):
+ def emit_warning(dummy=0):
+ import warnings
+
+ warnings.warn(
+ "The pandas.datetime class is deprecated "
+ "and will be removed from pandas in a future version. "
+ "Import from datetime instead.",
+ FutureWarning,
+ stacklevel=3,
+ )
+
+ def __new__(cls, *args, **kwargs):
+ cls.emit_warning()
+ from datetime import datetime as dt
+
+ return dt(*args, **kwargs)
+
+ datetime = __DatetimeSub
+
+ class __SparseArray(type):
+
+ from pandas.core.arrays.sparse import SparseArray as sa
+
+ SparseArray = sa
+
+ def __instancecheck__(cls, other):
+ return isinstance(other, cls.SparseArray)
+
+ class __SparseArraySub(metaclass=__SparseArray):
+ def emit_warning(dummy=0):
+ import warnings
+
+ warnings.warn(
+ "The pandas.SparseArray class is deprecated "
+ "and will be removed from pandas in a future version. "
+ "Use pandas.arrays.SparseArray instead.",
+ FutureWarning,
+ stacklevel=3,
+ )
+
+ def __new__(cls, *args, **kwargs):
+ cls.emit_warning()
+ from pandas.core.arrays.sparse import SparseArray as sa
+
+ return sa(*args, **kwargs)
+
+ SparseArray = __SparseArraySub
+
# module level doc-string
__doc__ = """
diff --git a/pandas/_config/config.py b/pandas/_config/config.py
index 6844df495547a..cacd6f5454de7 100644
--- a/pandas/_config/config.py
+++ b/pandas/_config/config.py
@@ -51,7 +51,18 @@
from collections import namedtuple
from contextlib import contextmanager
import re
-from typing import Any, Dict, Iterable, List
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Tuple,
+ Type,
+ TypeVar,
+ cast,
+)
import warnings
DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver")
@@ -80,7 +91,7 @@ class OptionError(AttributeError, KeyError):
# User API
-def _get_single_key(pat, silent):
+def _get_single_key(pat: str, silent: bool) -> str:
keys = _select_options(pat)
if len(keys) == 0:
if not silent:
@@ -98,7 +109,7 @@ def _get_single_key(pat, silent):
return key
-def _get_option(pat, silent=False):
+def _get_option(pat: str, silent: bool = False):
key = _get_single_key(pat, silent)
# walk the nested dict
@@ -106,7 +117,7 @@ def _get_option(pat, silent=False):
return root[k]
-def _set_option(*args, **kwargs):
+def _set_option(*args, **kwargs) -> None:
# must at least 1 arg deal with constraints later
nargs = len(args)
if not nargs or nargs % 2 != 0:
@@ -138,7 +149,7 @@ def _set_option(*args, **kwargs):
o.cb(key)
-def _describe_option(pat="", _print_desc=True):
+def _describe_option(pat: str = "", _print_desc: bool = True):
keys = _select_options(pat)
if len(keys) == 0:
@@ -154,7 +165,7 @@ def _describe_option(pat="", _print_desc=True):
return s
-def _reset_option(pat, silent=False):
+def _reset_option(pat: str, silent: bool = False) -> None:
keys = _select_options(pat)
@@ -165,15 +176,14 @@ def _reset_option(pat, silent=False):
raise ValueError(
"You must specify at least 4 characters when "
"resetting multiple keys, use the special keyword "
- '"all" to reset all the options to their default '
- "value"
+ '"all" to reset all the options to their default value'
)
for k in keys:
_set_option(k, _registered_options[k].defval, silent=silent)
-def get_default_val(pat):
+def get_default_val(pat: str):
key = _get_single_key(pat, silent=True)
return _get_registered_option(key).defval
@@ -181,11 +191,11 @@ def get_default_val(pat):
class DictWrapper:
""" provide attribute-style access to a nested dict"""
- def __init__(self, d, prefix=""):
+ def __init__(self, d: Dict[str, Any], prefix: str = ""):
object.__setattr__(self, "d", d)
object.__setattr__(self, "prefix", prefix)
- def __setattr__(self, key, val):
+ def __setattr__(self, key: str, val: Any) -> None:
prefix = object.__getattribute__(self, "prefix")
if prefix:
prefix += "."
@@ -197,7 +207,7 @@ def __setattr__(self, key, val):
else:
raise OptionError("You can only set the value of existing options")
- def __getattr__(self, key):
+ def __getattr__(self, key: str):
prefix = object.__getattribute__(self, "prefix")
if prefix:
prefix += "."
@@ -211,7 +221,7 @@ def __getattr__(self, key):
else:
return _get_option(prefix)
- def __dir__(self):
+ def __dir__(self) -> Iterable[str]:
return list(self.d.keys())
@@ -412,23 +422,31 @@ def __exit__(self, *args):
_set_option(pat, val, silent=True)
-def register_option(key: str, defval: object, doc="", validator=None, cb=None):
- """Register an option in the package-wide pandas config object
+def register_option(
+ key: str,
+ defval: object,
+ doc: str = "",
+ validator: Optional[Callable[[Any], Any]] = None,
+ cb: Optional[Callable[[str], Any]] = None,
+) -> None:
+ """
+ Register an option in the package-wide pandas config object
Parameters
----------
- key - a fully-qualified key, e.g. "x.y.option - z".
- defval - the default value of the option
- doc - a string description of the option
- validator - a function of a single argument, should raise `ValueError` if
- called with a value which is not a legal value for the option.
- cb - a function of a single argument "key", which is called
- immediately after an option value is set/reset. key is
- the full name of the option.
-
- Returns
- -------
- Nothing.
+ key : str
+ Fully-qualified key, e.g. "x.y.option - z".
+ defval : object
+ Default value of the option.
+ doc : str
+ Description of the option.
+ validator : Callable, optional
+ Function of a single argument, should raise `ValueError` if
+ called with a value which is not a legal value for the option.
+ cb
+ a function of a single argument "key", which is called
+ immediately after an option value is set/reset. key is
+ the full name of the option.
Raises
------
@@ -481,7 +499,9 @@ def register_option(key: str, defval: object, doc="", validator=None, cb=None):
)
-def deprecate_option(key, msg=None, rkey=None, removal_ver=None):
+def deprecate_option(
+ key: str, msg: Optional[str] = None, rkey: Optional[str] = None, removal_ver=None
+) -> None:
"""
Mark option `key` as deprecated, if code attempts to access this option,
a warning will be produced, using `msg` if given, or a default message
@@ -494,32 +514,27 @@ def deprecate_option(key, msg=None, rkey=None, removal_ver=None):
Parameters
----------
- key - the name of the option to be deprecated. must be a fully-qualified
- option name (e.g "x.y.z.rkey").
-
- msg - (Optional) a warning message to output when the key is referenced.
- if no message is given a default message will be emitted.
-
- rkey - (Optional) the name of an option to reroute access to.
- If specified, any referenced `key` will be re-routed to `rkey`
- including set/get/reset.
- rkey must be a fully-qualified option name (e.g "x.y.z.rkey").
- used by the default message if no `msg` is specified.
-
- removal_ver - (Optional) specifies the version in which this option will
- be removed. used by the default message if no `msg`
- is specified.
-
- Returns
- -------
- Nothing
+ key : str
+ Name of the option to be deprecated.
+ must be a fully-qualified option name (e.g "x.y.z.rkey").
+ msg : str, optional
+ Warning message to output when the key is referenced.
+ if no message is given a default message will be emitted.
+ rkey : str, optional
+ Name of an option to reroute access to.
+ If specified, any referenced `key` will be
+ re-routed to `rkey` including set/get/reset.
+ rkey must be a fully-qualified option name (e.g "x.y.z.rkey").
+ used by the default message if no `msg` is specified.
+ removal_ver : optional
+ Specifies the version in which this option will
+ be removed. used by the default message if no `msg` is specified.
Raises
------
- OptionError - if key has already been deprecated.
-
+ OptionError
+ If the specified key has already been deprecated.
"""
-
key = key.lower()
if key in _deprecated_options:
@@ -532,7 +547,7 @@ def deprecate_option(key, msg=None, rkey=None, removal_ver=None):
# functions internal to the module
-def _select_options(pat):
+def _select_options(pat: str) -> List[str]:
"""returns a list of keys matching `pat`
if pat=="all", returns all registered options
@@ -550,7 +565,7 @@ def _select_options(pat):
return [k for k in keys if re.search(pat, k, re.I)]
-def _get_root(key):
+def _get_root(key: str) -> Tuple[Dict[str, Any], str]:
path = key.split(".")
cursor = _global_config
for p in path[:-1]:
@@ -558,14 +573,14 @@ def _get_root(key):
return cursor, path[-1]
-def _is_deprecated(key):
+def _is_deprecated(key: str) -> bool:
""" Returns True if the given option has been deprecated """
key = key.lower()
return key in _deprecated_options
-def _get_deprecated_option(key):
+def _get_deprecated_option(key: str):
"""
Retrieves the metadata for a deprecated option, if `key` is deprecated.
@@ -582,7 +597,7 @@ def _get_deprecated_option(key):
return d
-def _get_registered_option(key):
+def _get_registered_option(key: str):
"""
Retrieves the option metadata if `key` is a registered option.
@@ -593,7 +608,7 @@ def _get_registered_option(key):
return _registered_options.get(key)
-def _translate_key(key):
+def _translate_key(key: str) -> str:
"""
if key id deprecated and a replacement key defined, will return the
replacement key, otherwise returns `key` as - is
@@ -606,7 +621,7 @@ def _translate_key(key):
return key
-def _warn_if_deprecated(key):
+def _warn_if_deprecated(key: str) -> bool:
"""
Checks if `key` is a deprecated option and if so, prints a warning.
@@ -634,7 +649,7 @@ def _warn_if_deprecated(key):
return False
-def _build_option_description(k):
+def _build_option_description(k: str) -> str:
""" Builds a formatted description of a registered option and prints it """
o = _get_registered_option(k)
@@ -659,7 +674,7 @@ def _build_option_description(k):
return s
-def pp_options_list(keys, width=80, _print=False):
+def pp_options_list(keys: Iterable[str], width=80, _print: bool = False):
""" Builds a concise listing of available options, grouped by prefix """
from textwrap import wrap
@@ -697,6 +712,9 @@ def pp(name: str, ks: Iterable[str]) -> List[str]:
#
# helpers
+FuncType = Callable[..., Any]
+F = TypeVar("F", bound=FuncType)
+
@contextmanager
def config_prefix(prefix):
@@ -728,12 +746,12 @@ def config_prefix(prefix):
global register_option, get_option, set_option, reset_option
- def wrap(func):
- def inner(key, *args, **kwds):
+ def wrap(func: F) -> F:
+ def inner(key: str, *args, **kwds):
pkey = f"{prefix}.{key}"
return func(pkey, *args, **kwds)
- return inner
+ return cast(F, inner)
_register_option = register_option
_get_option = get_option
@@ -751,7 +769,7 @@ def inner(key, *args, **kwds):
# arg in register_option
-def is_type_factory(_type):
+def is_type_factory(_type: Type[Any]) -> Callable[[Any], None]:
"""
Parameters
@@ -765,14 +783,14 @@ def is_type_factory(_type):
"""
- def inner(x):
+ def inner(x) -> None:
if type(x) != _type:
raise ValueError(f"Value must have type '{_type}'")
return inner
-def is_instance_factory(_type):
+def is_instance_factory(_type) -> Callable[[Any], None]:
"""
Parameters
@@ -792,19 +810,19 @@ def is_instance_factory(_type):
else:
type_repr = f"'{_type}'"
- def inner(x):
+ def inner(x) -> None:
if not isinstance(x, _type):
raise ValueError(f"Value must be an instance of {type_repr}")
return inner
-def is_one_of_factory(legal_values):
+def is_one_of_factory(legal_values) -> Callable[[Any], None]:
callables = [c for c in legal_values if callable(c)]
legal_values = [c for c in legal_values if not callable(c)]
- def inner(x):
+ def inner(x) -> None:
if x not in legal_values:
if not any(c(x) for c in callables):
@@ -818,7 +836,7 @@ def inner(x):
return inner
-def is_nonnegative_int(value):
+def is_nonnegative_int(value: Optional[int]) -> None:
"""
Verify that value is None or a positive int.
@@ -853,7 +871,7 @@ def is_nonnegative_int(value):
is_text = is_instance_factory((str, bytes))
-def is_callable(obj):
+def is_callable(obj) -> bool:
"""
Parameters
diff --git a/pandas/_config/display.py b/pandas/_config/display.py
index 067b7c503baab..ef319f4447565 100644
--- a/pandas/_config/display.py
+++ b/pandas/_config/display.py
@@ -1,6 +1,7 @@
"""
Unopinionated display configuration.
"""
+
import locale
import sys
@@ -11,7 +12,7 @@
_initial_defencoding = None
-def detect_console_encoding():
+def detect_console_encoding() -> str:
"""
Try to find the most capable encoding supported by the console.
slightly modified from the way IPython handles the same issue.
diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py
index dd1d4948aa6e3..0d68e78372d8a 100644
--- a/pandas/_config/localization.py
+++ b/pandas/_config/localization.py
@@ -12,7 +12,7 @@
@contextmanager
-def set_locale(new_locale, lc_var=locale.LC_ALL):
+def set_locale(new_locale, lc_var: int = locale.LC_ALL):
"""
Context manager for temporarily setting a locale.
@@ -44,7 +44,7 @@ def set_locale(new_locale, lc_var=locale.LC_ALL):
locale.setlocale(lc_var, current_locale)
-def can_set_locale(lc, lc_var=locale.LC_ALL):
+def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
"""
Check to see if we can set a locale, and subsequently get the locale,
without raising an Exception.
@@ -58,7 +58,7 @@ def can_set_locale(lc, lc_var=locale.LC_ALL):
Returns
-------
- is_valid : bool
+ bool
Whether the passed locale can be set
"""
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 7a2fc9dc7845a..dd1f38ce3a842 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -914,8 +914,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average',
ranks[argsorted[j]] = i + 1
elif tiebreak == TIEBREAK_FIRST:
if rank_t is object:
- raise ValueError('first not supported for '
- 'non-numeric data')
+ raise ValueError('first not supported for non-numeric data')
else:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = j + 1
@@ -971,8 +970,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average',
ranks[argsorted[j]] = i + 1
elif tiebreak == TIEBREAK_FIRST:
if rank_t is object:
- raise ValueError('first not supported for '
- 'non-numeric data')
+ raise ValueError('first not supported for non-numeric data')
else:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = j + 1
@@ -1137,8 +1135,7 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
ranks[i, argsorted[i, z]] = j + 1
elif tiebreak == TIEBREAK_FIRST:
if rank_t is object:
- raise ValueError('first not supported '
- 'for non-numeric data')
+ raise ValueError('first not supported for non-numeric data')
else:
for z in range(j - dups + 1, j + 1):
ranks[i, argsorted[i, z]] = z + 1
diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
index 420e08a3d68d4..995fabbedcb5d 100644
--- a/pandas/_libs/algos_take_helper.pxi.in
+++ b/pandas/_libs/algos_take_helper.pxi.in
@@ -116,7 +116,7 @@ def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
IF {{True if c_type_in == c_type_out != "object" else False}}:
cdef:
- {{c_type_out}} *v
+ const {{c_type_out}} *v
{{c_type_out}} *o
# GH#3130
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index abb8a6d388d26..93ea94f7b18fc 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -686,8 +686,7 @@ def _group_ohlc(floating[:, :] out,
raise ValueError('Output array must have 4 columns')
if K > 1:
- raise NotImplementedError("Argument 'values' must have only "
- "one dimension")
+ raise NotImplementedError("Argument 'values' must have only one dimension")
out[:] = np.nan
with nogil:
diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
index d735890f7d07e..878da670b2f68 100644
--- a/pandas/_libs/hashing.pyx
+++ b/pandas/_libs/hashing.pyx
@@ -51,8 +51,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
k =