diff --git a/.circleci/config.yml b/.circleci/config.yml
index 50f6a116a6630..ba124533e953a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -48,7 +48,7 @@ jobs:
           name: Build aarch64 wheels
           no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
           command: |
-            pip3 install cibuildwheel==2.14.1
+            pip3 install cibuildwheel==2.15.0
             cibuildwheel --prerelease-pythons --output-dir wheelhouse
           environment:
             CIBW_BUILD: << parameters.cibw-build >>
@@ -92,5 +92,4 @@ workflows:
               only: /^v.*/
           matrix:
             parameters:
-              # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
-              cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]#, "cp312-manylinux_aarch64"]
+              cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64", "cp312-manylinux_aarch64"]
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index f87aef5385898..3bd68c07dcbc3 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -33,7 +33,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
@@ -109,7 +109,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
@@ -143,7 +143,7 @@ jobs:
         run: docker image prune -f
 
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -164,7 +164,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 8715c5306a3b0..2182e89731990 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -27,7 +27,7 @@ jobs:
           - python
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: github/codeql-action/init@v2
         with:
           languages: ${{ matrix.language }}
diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml
index 2550d4de34a45..55dd733d25b50 100644
--- a/.github/workflows/comment-commands.yml
+++ b/.github/workflows/comment-commands.yml
@@ -51,7 +51,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml
index e05f12ac6416a..deaf2be0a0423 100644
--- a/.github/workflows/docbuild-and-upload.yml
+++ b/.github/workflows/docbuild-and-upload.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
diff --git a/.github/workflows/package-checks.yml b/.github/workflows/package-checks.yml
index 04abcf4ce8816..64a94d7fde5a9 100644
--- a/.github/workflows/package-checks.yml
+++ b/.github/workflows/package-checks.yml
@@ -34,7 +34,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -62,7 +62,7 @@ jobs:
       cancel-in-progress: true
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 6410f2edd6175..f2b426269098b 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -136,7 +136,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
@@ -194,7 +194,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -330,7 +330,7 @@ jobs:
       PYTEST_TARGET: pandas
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 5f541f1bae1fd..83d14b51092e6 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -48,7 +48,7 @@ jobs:
       sdist_file: ${{ steps.save-path.outputs.sdist_name }}
     steps:
       - name: Checkout pandas
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -97,14 +97,13 @@ jobs:
         - [macos-12, macosx_*]
         - [windows-2022, win_amd64]
         # TODO: support PyPy?
-        # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
-        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]#, ["cp312", "3.12"]]
+        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"]]
     env:
       IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
       IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
     steps:
       - name: Checkout pandas
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -150,8 +149,10 @@ jobs:
         uses: mamba-org/setup-micromamba@v1
         with:
           environment-name: wheel-env
+          # Use a fixed Python, since we might have an unreleased Python not
+          # yet present on conda-forge
           create-args: >-
-            python=${{ matrix.python[1] }}
+            python=3.11
             anaconda-client
             wheel
           cache-downloads: true
@@ -167,12 +168,13 @@ jobs:
         shell: pwsh
         run: |
           $TST_CMD = @"
-          python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
-          python -m pip install --find-links=pandas\wheelhouse --no-index pandas;
+          python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
+          python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
           python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`';
           "@
-          docker pull python:${{ matrix.python[1] }}-windowsservercore
-          docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD
+          # add rc to the end of the image name if the Python version is unreleased
+          docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
+          docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
 
       - uses: actions/upload-artifact@v3
         with:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9f9bcd78c07b0..c01bf65818167 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
     hooks:
       - id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.285
+    rev: v0.0.287
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -34,7 +34,7 @@ repos:
         alias: ruff-selected-autofixes
         args: [--select, "ANN001,ANN204", --fix-only, --exit-non-zero-on-fix]
 -   repo: https://github.com/jendrikseipp/vulture
-    rev: 'v2.7'
+    rev: 'v2.9.1'
     hooks:
       - id: vulture
         entry: python scripts/run_vulture.py
@@ -84,7 +84,7 @@ repos:
             '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
         ]
 -   repo: https://github.com/pylint-dev/pylint
-    rev: v3.0.0a6
+    rev: v3.0.0a7
     hooks:
     -   id: pylint
         stages: [manual]
@@ -124,7 +124,7 @@ repos:
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
 -   repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.6.7
+    rev: v0.6.8
     hooks:
     - id: sphinx-lint
 -   repo: local
diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py
index 09c4acc0ab309..0229cf15fbfb8 100644
--- a/asv_bench/benchmarks/array.py
+++ b/asv_bench/benchmarks/array.py
@@ -90,7 +90,7 @@ def time_setitem(self, multiple_chunks):
             self.array[i] = "foo"
 
     def time_setitem_list(self, multiple_chunks):
-        indexer = list(range(0, 50)) + list(range(-1000, 0, 50))
+        indexer = list(range(50)) + list(range(-1000, 0, 50))
         self.array[indexer] = ["foo"] * len(indexer)
 
     def time_setitem_slice(self, multiple_chunks):
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index 54bcdb0fa2843..04ac47a892a22 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -360,14 +360,14 @@ class MergeCategoricals:
     def setup(self):
         self.left_object = DataFrame(
             {
-                "X": np.random.choice(range(0, 10), size=(10000,)),
+                "X": np.random.choice(range(10), size=(10000,)),
                 "Y": np.random.choice(["one", "two", "three"], size=(10000,)),
             }
         )
 
         self.right_object = DataFrame(
             {
-                "X": np.random.choice(range(0, 10), size=(10000,)),
+                "X": np.random.choice(range(10), size=(10000,)),
                 "Z": np.random.choice(["jjj", "kkk", "sss"], size=(10000,)),
             }
         )
diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index 2190136220c6c..927003b13d6be 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -46,6 +46,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml
index cf85345cb0cc2..00df41cce3bae 100644
--- a/ci/deps/actions-311-downstream_compat.yaml
+++ b/ci/deps/actions-311-downstream_compat.yaml
@@ -47,6 +47,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml
index 3c1630714a041..d50ea20da1e0c 100644
--- a/ci/deps/actions-311.yaml
+++ b/ci/deps/actions-311.yaml
@@ -46,6 +46,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   # - pytables>=3.7.0, 3.8.0 is first version that supports 3.11
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml
index b1cea49e22d15..10862630bd596 100644
--- a/ci/deps/actions-39-minimum_versions.yaml
+++ b/ci/deps/actions-39-minimum_versions.yaml
@@ -48,6 +48,7 @@ dependencies:
   - pymysql=1.0.2
   - pyreadstat=1.1.5
   - pytables=3.7.0
+  - python-calamine=0.1.6
   - pyxlsb=1.0.9
   - s3fs=2022.05.0
   - scipy=1.8.1
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
index b8a119ece4b03..904b55a813a9f 100644
--- a/ci/deps/actions-39.yaml
+++ b/ci/deps/actions-39.yaml
@@ -46,6 +46,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml
index 71686837451b4..4060cea73e7f6 100644
--- a/ci/deps/circle-310-arm64.yaml
+++ b/ci/deps/circle-310-arm64.yaml
@@ -47,6 +47,7 @@ dependencies:
   - pymysql>=1.0.2
   # - pyreadstat>=1.1.5 not available on ARM
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/doc/cheatsheet/README.md b/doc/cheatsheet/README.md
new file mode 100644
index 0000000000000..6c33de104ed90
--- /dev/null
+++ b/doc/cheatsheet/README.md
@@ -0,0 +1,22 @@
+# Pandas Cheat Sheet
+
+The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
+To create the PDF version, within Powerpoint, simply do a "Save As"
+and pick "PDF" as the format.
+
+This cheat sheet, originally written by Irv Lustig, [Princeton Consultants](https://www.princetonoptimization.com/), was inspired by the [RStudio Data Wrangling Cheatsheet](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf).
+
+| Topic                  | PDF                                                                                                                                                                                                                                     | PPT                                                                                                                                                                                                                                               |
+|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Pandas_Cheat_Sheet     | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a>    | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a>     |
+| Pandas_Cheat_Sheet_JA  | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img  src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
+
+
+**Alternative**
+
+Alternatively, if you want to complement your learning, you can use the Pandas Cheat sheets
+developed by [DataCamp](https://www.datacamp.com/) in "PDF", "Google Colab" and "Streamlit" formats.
+
+| Topic       | PDF                                                                                                                                                                                                                                  | Streamlit                                                                                                                                                        | Google Colab                                                                                                                                                                                                                                   |
+|-------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Pandas      | <a href="https://github.com/fralfaro/DS-Cheat-Sheets/blob/main/docs/files/pandas_cs.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a>       | <a href="https://ds-cheat-sheets-pandas.streamlit.app/" target="_parent"><img src="https://static.streamlit.io/badges/streamlit_badge_black_white.svg"/></a>     | <a href="https://colab.research.google.com/github/fralfaro/DS-Cheat-Sheets/blob/main/docs/examples/pandas/pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>         |
diff --git a/doc/cheatsheet/README.txt b/doc/cheatsheet/README.txt
deleted file mode 100644
index c57da38b31777..0000000000000
--- a/doc/cheatsheet/README.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
-To create the PDF version, within Powerpoint, simply do a "Save As"
-and pick "PDF" as the format.
-
-This cheat sheet was inspired by the RStudio Data Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2].
-
-[1]: https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf
-[2]: https://www.princetonoptimization.com/
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index ae7c9d4ea9c62..2c0787397e047 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -281,6 +281,7 @@ xlrd                      2.0.1              excel           Reading Excel
 xlsxwriter                3.0.3              excel           Writing Excel
 openpyxl                  3.0.10             excel           Reading / writing for xlsx files
 pyxlsb                    1.0.9              excel           Reading for xlsb files
+python-calamine           0.1.6              excel           Reading for xls/xlsx/xlsb/ods files
 ========================= ================== =============== =============================================================
 
 HTML
diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
index 2dcc8b0abe3b8..caaff3557ae40 100644
--- a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
@@ -106,9 +106,9 @@ between square brackets ``[]``.
     </ul>
 
 .. note::
-    If you are familiar to Python
+    If you are familiar with Python
     :ref:`dictionaries <python:tut-dictionaries>`, the selection of a
-    single column is very similar to selection of dictionary values based on
+    single column is very similar to the selection of dictionary values based on
     the key.
 
 You can create a ``Series`` from scratch as well:
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
index c0d2a14507383..002e88533ab93 100644
--- a/doc/source/user_guide/cookbook.rst
+++ b/doc/source/user_guide/cookbook.rst
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
    df
 
    # List the size of the animals with the highest weight.
-   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
+   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
 
 `Using get_group
 <https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
        return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
 
 
-   expected_df = gb.apply(GrowUp)
+   expected_df = gb.apply(GrowUp, include_groups=False)
    expected_df
 
 `Expanding apply
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index c28123cec4491..5dd14e243fbb3 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -420,6 +420,12 @@ This is mainly syntactic sugar for the alternative, which is much more verbose:
 Additionally, this method avoids recomputing the internal grouping information
 derived from the passed key.
 
+You can also include the grouping columns if you want to operate on them.
+
+.. ipython:: python
+
+   grouped[["A", "B"]].sum()
+
 .. _groupby.iterating-label:
 
 Iterating through groups
@@ -1053,7 +1059,7 @@ missing values with the ``ffill()`` method.
    ).set_index("date")
    df_re
 
-   df_re.groupby("group").resample("1D").ffill()
+   df_re.groupby("group").resample("1D", include_groups=False).ffill()
 
 .. _groupby.filter:
 
@@ -1219,13 +1225,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=True).apply(lambda x: x)
+    df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
 
 with
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=False).apply(lambda x: x)
+    df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
 
 
 Numba Accelerated Routines
@@ -1709,7 +1715,7 @@ column index name will be used as the name of the inserted column:
        result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
        return pd.Series(result, name="metrics")
 
-   result = df.groupby("a").apply(compute_metrics)
+   result = df.groupby("a").apply(compute_metrics, include_groups=False)
 
    result
 
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 17f52c98a741b..3408da439d0ef 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1811,8 +1811,8 @@ Writing JSON
 A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
 with optional parameters:
 
-* ``path_or_buf`` : the pathname or buffer to write the output
-  This can be ``None`` in which case a JSON string is returned
+* ``path_or_buf`` : the pathname or buffer to write the output.
+  This can be ``None`` in which case a JSON string is returned.
 * ``orient`` :
 
   ``Series``:
@@ -3454,7 +3454,8 @@ Excel files
 The :func:`~pandas.read_excel` method can read Excel 2007+ (``.xlsx``) files
 using the ``openpyxl`` Python module. Excel 2003 (``.xls``) files
 can be read using ``xlrd``. Binary Excel (``.xlsb``)
-files can be read using ``pyxlsb``.
+files can be read using ``pyxlsb``. All formats can be read
+using :ref:`calamine<io.calamine>` engine.
 The :meth:`~DataFrame.to_excel` instance method is used for
 saving a ``DataFrame`` to Excel.  Generally the semantics are
 similar to working with :ref:`csv<io.read_csv_table>` data.
@@ -3495,6 +3496,9 @@ using internally.
 
 * For the engine odf, pandas is using :func:`odf.opendocument.load` to read in (``.ods``) files.
 
+* For the engine calamine, pandas is using :func:`python_calamine.load_workbook`
+  to read in (``.xlsx``), (``.xlsm``), (``.xls``), (``.xlsb``), (``.ods``) files.
+
 .. code-block:: python
 
    # Returns a DataFrame
@@ -3936,7 +3940,8 @@ The :func:`~pandas.read_excel` method can also read binary Excel files
 using the ``pyxlsb`` module. The semantics and features for reading
 binary Excel files mostly match what can be done for `Excel files`_ using
 ``engine='pyxlsb'``. ``pyxlsb`` does not recognize datetime types
-in files and will return floats instead.
+in files and will return floats instead (you can use :ref:`calamine<io.calamine>`
+if you need recognize datetime types).
 
 .. code-block:: python
 
@@ -3948,6 +3953,20 @@ in files and will return floats instead.
    Currently pandas only supports *reading* binary Excel files. Writing
    is not implemented.
 
+.. _io.calamine:
+
+Calamine (Excel and ODS files)
+------------------------------
+
+The :func:`~pandas.read_excel` method can read Excel file (``.xlsx``, ``.xlsm``, ``.xls``, ``.xlsb``)
+and OpenDocument spreadsheets (``.ods``) using the ``python-calamine`` module.
+This module is a binding for Rust library `calamine <https://crates.io/crates/calamine>`__
+and is faster than other engines in most cases. The optional dependency 'python-calamine' needs to be installed.
+
+.. code-block:: python
+
+   # Returns a DataFrame
+   pd.read_excel("path_to_file.xlsb", engine="calamine")
 
 .. _io.clipboard:
 
diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst
index 3425986a37743..422efc1b36946 100644
--- a/doc/source/whatsnew/v0.10.0.rst
+++ b/doc/source/whatsnew/v0.10.0.rst
@@ -180,19 +180,36 @@ labeled the aggregated group with the end of the interval: the next day).
   DataFrame constructor with no columns specified. The v0.9.0 behavior (names
   ``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``:
 
-.. ipython:: python
-   :okexcept:
-
-    import io
-
-    data = """
-    a,b,c
-    1,Yes,2
-    3,No,4
-    """
-    print(data)
-    pd.read_csv(io.StringIO(data), header=None)
-    pd.read_csv(io.StringIO(data), header=None, prefix="X")
+.. code-block:: ipython
+
+    In [6]: import io
+
+    In [7]: data = """
+      ...: a,b,c
+      ...: 1,Yes,2
+      ...: 3,No,4
+      ...: """
+      ...:
+
+    In [8]: print(data)
+
+        a,b,c
+        1,Yes,2
+        3,No,4
+
+    In [9]: pd.read_csv(io.StringIO(data), header=None)
+    Out[9]:
+           0    1  2
+    0      a    b  c
+    1      1  Yes  2
+    2      3   No  4
+
+    In [10]: pd.read_csv(io.StringIO(data), header=None, prefix="X")
+    Out[10]:
+            X0   X1 X2
+    0       a    b  c
+    1       1  Yes  2
+    2       3   No  4
 
 - Values like ``'Yes'`` and ``'No'`` are not interpreted as boolean by default,
   though this can be controlled by new ``true_values`` and ``false_values``
diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst
index 92c37243b7e81..9c537b3a48c74 100644
--- a/doc/source/whatsnew/v0.14.0.rst
+++ b/doc/source/whatsnew/v0.14.0.rst
@@ -328,13 +328,24 @@ More consistent behavior for some groupby methods:
 
 - groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation:
 
-  .. ipython:: python
+  .. code-block:: ipython
 
-     df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-     g = df.groupby('A')
-     g.head(1)  # filters DataFrame
+     In [1]: df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
 
-     g.apply(lambda x: x.head(1))  # used to simply fall-through
+     In [2]: g = df.groupby('A')
+
+     In [3]: g.head(1)  # filters DataFrame
+     Out[3]:
+        A  B
+     0  1  2
+     2  5  6
+
+     In [4]: g.apply(lambda x: x.head(1))  # used to simply fall-through
+     Out[4]:
+          A  B
+     A
+     1 0  1  2
+     5 2  5  6
 
 - groupby head and tail respect column selection:
 
diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst
index bb7beef449d93..acc5409b86d09 100644
--- a/doc/source/whatsnew/v0.15.2.rst
+++ b/doc/source/whatsnew/v0.15.2.rst
@@ -24,25 +24,61 @@ API changes
 - Indexing in ``MultiIndex`` beyond lex-sort depth is now supported, though
   a lexically sorted index will have a better performance. (:issue:`2646`)
 
-  .. ipython:: python
-    :okexcept:
-    :okwarning:
+  .. code-block:: ipython
+
+    In [1]: df = pd.DataFrame({'jim':[0, 0, 1, 1],
+       ...:                    'joe':['x', 'x', 'z', 'y'],
+       ...:                    'jolie':np.random.rand(4)}).set_index(['jim', 'joe'])
+       ...:
 
-    df = pd.DataFrame({'jim':[0, 0, 1, 1],
-                       'joe':['x', 'x', 'z', 'y'],
-                       'jolie':np.random.rand(4)}).set_index(['jim', 'joe'])
-    df
-    df.index.lexsort_depth
+    In [2]: df
+    Out[2]:
+                jolie
+    jim joe
+    0   x    0.126970
+        x    0.966718
+    1   z    0.260476
+        y    0.897237
+
+    [4 rows x 1 columns]
+
+    In [3]: df.index.lexsort_depth
+    Out[3]: 1
 
     # in prior versions this would raise a KeyError
     # will now show a PerformanceWarning
-    df.loc[(1, 'z')]
+    In [4]: df.loc[(1, 'z')]
+    Out[4]:
+                jolie
+    jim joe
+    1   z    0.260476
+
+    [1 rows x 1 columns]
 
     # lexically sorting
-    df2 = df.sort_index()
-    df2
-    df2.index.lexsort_depth
-    df2.loc[(1,'z')]
+    In [5]: df2 = df.sort_index()
+
+    In [6]: df2
+    Out[6]:
+                jolie
+    jim joe
+    0   x    0.126970
+        x    0.966718
+    1   y    0.897237
+        z    0.260476
+
+    [4 rows x 1 columns]
+
+    In [7]: df2.index.lexsort_depth
+    Out[7]: 2
+
+    In [8]: df2.loc[(1,'z')]
+    Out[8]:
+                jolie
+    jim joe
+    1   z    0.260476
+
+    [1 rows x 1 columns]
 
 - Bug in unique of Series with ``category`` dtype, which returned all categories regardless
   whether they were "used" or not (see :issue:`8559` for the discussion).
diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst
index 7d9008fdbdecd..ee6a60144bc35 100644
--- a/doc/source/whatsnew/v0.18.1.rst
+++ b/doc/source/whatsnew/v0.18.1.rst
@@ -77,9 +77,52 @@ Previously you would have to do this to get a rolling window mean per-group:
    df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
    df
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
+   In [1]: df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
+   Out[1]:
+   A
+   1  0      NaN
+      1      NaN
+      2      NaN
+      3      1.5
+      4      2.5
+      5      3.5
+      6      4.5
+      7      5.5
+      8      6.5
+      9      7.5
+      10     8.5
+      11     9.5
+      12    10.5
+      13    11.5
+      14    12.5
+      15    13.5
+      16    14.5
+      17    15.5
+      18    16.5
+      19    17.5
+   2  20     NaN
+      21     NaN
+      22     NaN
+      23    21.5
+      24    22.5
+      25    23.5
+      26    24.5
+      27    25.5
+      28    26.5
+      29    27.5
+      30    28.5
+      31    29.5
+   3  32     NaN
+      33     NaN
+      34     NaN
+      35    33.5
+      36    34.5
+      37    35.5
+      38    36.5
+      39    37.5
+   Name: B, dtype: float64
 
 Now you can do:
 
@@ -101,15 +144,53 @@ For ``.resample(..)`` type of operations, previously you would have to:
 
    df
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("group").apply(lambda x: x.resample("1D").ffill())
+   In[1]: df.groupby("group").apply(lambda x: x.resample("1D").ffill())
+   Out[1]:
+                     group  val
+   group date
+   1     2016-01-03      1    5
+         2016-01-04      1    5
+         2016-01-05      1    5
+         2016-01-06      1    5
+         2016-01-07      1    5
+         2016-01-08      1    5
+         2016-01-09      1    5
+         2016-01-10      1    6
+   2     2016-01-17      2    7
+         2016-01-18      2    7
+         2016-01-19      2    7
+         2016-01-20      2    7
+         2016-01-21      2    7
+         2016-01-22      2    7
+         2016-01-23      2    7
+         2016-01-24      2    8
 
 Now you can do:
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("group").resample("1D").ffill()
+   In[1]: df.groupby("group").resample("1D").ffill()
+   Out[1]:
+                     group  val
+   group date
+   1     2016-01-03      1    5
+         2016-01-04      1    5
+         2016-01-05      1    5
+         2016-01-06      1    5
+         2016-01-07      1    5
+         2016-01-08      1    5
+         2016-01-09      1    5
+         2016-01-10      1    6
+   2     2016-01-17      2    7
+         2016-01-18      2    7
+         2016-01-19      2    7
+         2016-01-20      2    7
+         2016-01-21      2    7
+         2016-01-22      2    7
+         2016-01-23      2    7
+         2016-01-24      2    8
 
 .. _whatsnew_0181.enhancements.method_chain:
 
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 4ad5f4e7b5c3d..b013d03b2d68c 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -286,6 +286,7 @@ value. (:issue:`17054`)
 
 .. ipython:: python
 
+    from io import StringIO
     result = pd.read_html(StringIO("""
       <table>
         <thead>
diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst
index 1ef60960a51c3..6d5da7cdff3b3 100644
--- a/doc/source/whatsnew/v2.1.1.rst
+++ b/doc/source/whatsnew/v2.1.1.rst
@@ -13,15 +13,30 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
+- Fixed regression in :func:`concat` when :class:`DataFrame` 's have two different extension dtypes (:issue:`54848`)
+- Fixed regression in :func:`merge` when merging over a PyArrow string index (:issue:`54894`)
 - Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`)
+- Fixed regression in :func:`read_csv` when ``delim_whitespace`` is True (:issue:`54918`, :issue:`54931`)
+- Fixed regression in :meth:`.GroupBy.get_group` raising for ``axis=1`` (:issue:`54858`)
 - Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
+- Fixed regression in :meth:`DataFrame.filter` not respecting the order of elements for ``filter`` (:issue:`54980`)
+- Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite (:issue:`54877`)
+- Fixed regression in :meth:`DataFrameGroupBy.agg` when aggregating a DataFrame with duplicate column names using a dictionary (:issue:`55006`)
+- Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`)
+- Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`)
+- Fixed regression in :meth:`Series.interpolate` raising when ``fill_value`` was given (:issue:`54920`)
+- Fixed regression in :meth:`Series.value_counts` raising for numeric data if ``bins`` was specified (:issue:`54857`)
+- Fixed regression when comparing a :class:`Series` with ``datetime64`` dtype with ``None`` (:issue:`54870`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_211.bug_fixes:
 
 Bug fixes
 ~~~~~~~~~
--
+- Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`)
+- Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`)
+- Fixed bug in :meth:`Series.dt.tz` with :class:`ArrowDtype` where a string was returned instead of a ``tzinfo`` object (:issue:`55003`)
+- Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_211.other:
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index c6d8ad8be62c4..b79d5341d226a 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -14,10 +14,27 @@ including other versions of pandas.
 Enhancements
 ~~~~~~~~~~~~
 
-.. _whatsnew_220.enhancements.enhancement1:
+.. _whatsnew_220.enhancements.calamine:
+
+Calamine engine for :func:`read_excel`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``calamine`` engine was added to :func:`read_excel`.
+It uses ``python-calamine``, which provides Python bindings for the Rust library `calamine <https://crates.io/crates/calamine>`__.
+This engine supports Excel files (``.xlsx``, ``.xlsm``, ``.xls``, ``.xlsb``) and OpenDocument spreadsheets (``.ods``) (:issue:`50395`).
+
+There are two advantages of this engine:
+
+1. Calamine is often faster than other engines, some benchmarks show results up to 5x faster than 'openpyxl', 20x - 'odf', 4x - 'pyxlsb', and 1.5x - 'xlrd'.
+   But, 'openpyxl' and 'pyxlsb' are faster in reading a few rows from large files because of lazy iteration over rows.
+2. Calamine supports the recognition of datetime in ``.xlsb`` files, unlike 'pyxlsb' which is the only other engine in pandas that can read ``.xlsb`` files.
+
+.. code-block:: python
+
+   pd.read_excel("path_to_file.xlsb", engine="calamine")
 
-enhancement1
-^^^^^^^^^^^^
+
+For more, see :ref:`io.calamine` in the user guide on IO tools.
 
 .. _whatsnew_220.enhancements.enhancement2:
 
@@ -28,7 +45,7 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
--
+- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -147,19 +164,22 @@ Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
 - Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`)
 - Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
+- Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
 - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
 - Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.performance:
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
-- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`, :issue:`54883`)
+- Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
+- Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
+- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
+- Performance improvement in :meth:`Index.difference` (:issue:`55108`)
 - Performance improvement when indexing with more than 4 keys (:issue:`54550`)
 -
 
@@ -169,10 +189,12 @@ Performance improvements
 Bug fixes
 ~~~~~~~~~
 - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`)
+- Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)
+- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
 
 Categorical
 ^^^^^^^^^^^
--
+- :meth:`Categorical.isin` raising ``InvalidIndexError`` for categorical containing overlapping :class:`Interval` values (:issue:`34974`)
 -
 
 Datetimelike
@@ -227,7 +249,9 @@ MultiIndex
 
 I/O
 ^^^
+- Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
 - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
+- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
 
 Period
 ^^^^^^
@@ -246,8 +270,8 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
--
--
+- Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
+- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
 
 Sparse
 ^^^^^^
diff --git a/environment.yml b/environment.yml
index 1a9dffb55bca7..8deae839f5408 100644
--- a/environment.yml
+++ b/environment.yml
@@ -47,6 +47,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
@@ -105,7 +106,7 @@ dependencies:
   - ipykernel
 
   # web
-  - jinja2  # in optional dependencies, but documented here as needed
+  # - jinja2  # already listed in optional dependencies, but documented here for reference
   - markdown
   - feedparser
   - pyyaml
diff --git a/generate_version.py b/generate_version.py
index 46e9f52bfc5de..06e38ce0fd978 100644
--- a/generate_version.py
+++ b/generate_version.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 # Note: This file has to live next to setup.py or versioneer will not work
 import argparse
 import os
diff --git a/meson.build b/meson.build
index 09a1494135af4..e0e533ffade97 100644
--- a/meson.build
+++ b/meson.build
@@ -2,19 +2,17 @@
 project(
     'pandas',
     'c', 'cpp', 'cython',
-    version: run_command(['python', 'generate_version.py', '--print'], check: true).stdout().strip(),
+    version: run_command(['generate_version.py', '--print'], check: true).stdout().strip(),
     license: 'BSD-3',
     meson_version: '>=1.0.1',
     default_options: [
         'buildtype=release',
-        # TODO: Reactivate werror, some warnings on Windows
-        #'werror=true',
         'c_std=c99'
     ]
 )
 
 fs = import('fs')
-py = import('python').find_installation()
+py = import('python').find_installation(pure: false)
 tempita = files('generate_pxi.py')
 versioneer = files('generate_version.py')
 
@@ -30,7 +28,7 @@ add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'c
 
 
 if fs.exists('_version_meson.py')
-    py.install_sources('_version_meson.py', pure: false, subdir: 'pandas')
+    py.install_sources('_version_meson.py', subdir: 'pandas')
 else
     custom_target('write_version_file',
         output: '_version_meson.py',
@@ -40,11 +38,15 @@ else
         build_by_default: true,
         build_always_stale: true,
         install: true,
-        install_dir: py.get_install_dir(pure: false) / 'pandas'
+        install_dir: py.get_install_dir() / 'pandas'
     )
     meson.add_dist_script(py, versioneer, '-o', '_version_meson.py')
 endif
 
 # Needed by pandas.test() when it looks for the pytest ini options
-py.install_sources('pyproject.toml', pure: false, subdir: 'pandas')
+py.install_sources(
+    'pyproject.toml',
+    subdir: 'pandas'
+)
+
 subdir('pandas')
diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index f302c649bc7bd..1cf2c4343d844 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -69,7 +69,8 @@ libs_sources = {
     'index': {'sources': ['index.pyx', _index_class_helper]},
     'indexing': {'sources': ['indexing.pyx']},
     'internals': {'sources': ['internals.pyx']},
-    'interval': {'sources': ['interval.pyx', _intervaltree_helper]},
+    'interval': {'sources': ['interval.pyx', _intervaltree_helper],
+                 'deps': _khash_primitive_helper_dep},
     'join': {'sources': ['join.pyx', _khash_primitive_helper],
              'deps': _khash_primitive_helper_dep},
     'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']},
@@ -113,8 +114,9 @@ foreach ext_name, ext_dict : libs_sources
     )
 endforeach
 
-py.install_sources('__init__.py',
-                    pure: false,
-                    subdir: 'pandas/_libs')
+py.install_sources(
+    '__init__.py',
+    subdir: 'pandas/_libs'
+)
 
 subdir('window')
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 6d66e21ce49f5..5f51f48b43ca9 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -6,7 +6,6 @@ from csv import (
     QUOTE_NONE,
     QUOTE_NONNUMERIC,
 )
-import sys
 import time
 import warnings
 
@@ -880,9 +879,15 @@ cdef class TextReader:
 
     cdef _check_tokenize_status(self, int status):
         if self.parser.warn_msg != NULL:
-            print(PyUnicode_DecodeUTF8(
-                self.parser.warn_msg, strlen(self.parser.warn_msg),
-                self.encoding_errors), file=sys.stderr)
+            warnings.warn(
+                PyUnicode_DecodeUTF8(
+                    self.parser.warn_msg,
+                    strlen(self.parser.warn_msg),
+                    self.encoding_errors
+                ),
+                ParserWarning,
+                stacklevel=find_stack_level()
+            )
             free(self.parser.warn_msg)
             self.parser.warn_msg = NULL
 
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
index abd3fb9e1fef3..ce8a38df172ef 100644
--- a/pandas/_libs/src/parser/tokenizer.c
+++ b/pandas/_libs/src/parser/tokenizer.c
@@ -664,7 +664,8 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes,
     ((!self->delim_whitespace && c == ' ' && self->skipinitialspace))
 
 // applied when in a field
-#define IS_DELIMITER(c) ((c == delimiter) || (delim_whitespace && isblank(c)))
+#define IS_DELIMITER(c) \
+    ((!delim_whitespace && c == delimiter) || (delim_whitespace && isblank(c)))
 
 #define _TOKEN_CLEANUP()                                                \
     self->stream_len = slen;                                            \
diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
index e3e710ce1b876..942bd0b518144 100644
--- a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
+++ b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
@@ -44,6 +44,7 @@ Numeric decoder derived from TCL library
 #include <float.h>
 #include <locale.h>
 #include <math.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -763,7 +764,12 @@ void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {
 
 void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
     char *wstr;
-    JSUINT64 uvalue = (value < 0) ? -value : value;
+    JSUINT64 uvalue;
+    if (value == INT64_MIN) {
+      uvalue = INT64_MAX + UINT64_C(1);
+    } else {
+      uvalue = (value < 0) ? -value : value;
+    }
 
     wstr = enc->offset;
     // Conversion. Number is reversed.
diff --git a/pandas/_libs/tslibs/meson.build b/pandas/_libs/tslibs/meson.build
index 14d2eef46da20..167695b84514c 100644
--- a/pandas/_libs/tslibs/meson.build
+++ b/pandas/_libs/tslibs/meson.build
@@ -31,6 +31,7 @@ foreach ext_name, ext_dict : tslibs_sources
     )
 endforeach
 
-py.install_sources('__init__.py',
-                    pure: false,
-                    subdir: 'pandas/_libs/tslibs')
+py.install_sources(
+    '__init__.py',
+    subdir: 'pandas/_libs/tslibs'
+)
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 7b2ee68c73ad2..c3ee68e14a8d4 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -1,4 +1,3 @@
-cimport cython
 from cpython.datetime cimport (
     PyDateTime_CheckExact,
     PyDateTime_DATE_GET_HOUR,
@@ -18,6 +17,7 @@ from cpython.object cimport (
     Py_LT,
     Py_NE,
 )
+from libc.stdint cimport INT64_MAX
 
 import_datetime()
 PandasDateTime_IMPORT
@@ -545,7 +545,6 @@ cdef ndarray astype_round_check(
     return iresult
 
 
-@cython.overflowcheck(True)
 cdef int64_t get_conversion_factor(
     NPY_DATETIMEUNIT from_unit,
     NPY_DATETIMEUNIT to_unit
@@ -553,6 +552,7 @@ cdef int64_t get_conversion_factor(
     """
     Find the factor by which we need to multiply to convert from from_unit to to_unit.
     """
+    cdef int64_t value, overflow_limit, factor
     if (
         from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
         or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
@@ -565,28 +565,44 @@ cdef int64_t get_conversion_factor(
         return 1
 
     if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
-        return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
+        factor = 7
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
-        return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
+        factor = 24
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
-        return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
+        factor = 60
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
-        return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
+        factor = 60
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
+        factor = 1000
     else:
         raise ValueError("Converting from M or Y units is not supported.")
 
+    overflow_limit = INT64_MAX // factor
+    if value > overflow_limit or value < -overflow_limit:
+        raise OverflowError("result would overflow")
+
+    return factor * value
+
 
 cdef int64_t convert_reso(
     int64_t value,
@@ -595,7 +611,7 @@ cdef int64_t convert_reso(
     bint round_ok,
 ) except? -1:
     cdef:
-        int64_t res_value, mult, div, mod
+        int64_t res_value, mult, div, mod, overflow_limit
 
     if from_reso == to_reso:
         return value
@@ -624,9 +640,12 @@ cdef int64_t convert_reso(
     else:
         # e.g. ns -> us, risk of overflow, but no risk of lossy rounding
         mult = get_conversion_factor(from_reso, to_reso)
-        with cython.overflowcheck(True):
+        overflow_limit = INT64_MAX // mult
+        if value > overflow_limit or value < -overflow_limit:
             # Note: caller is responsible for re-raising as OutOfBoundsTimedelta
-            res_value = value * mult
+            raise OverflowError("result would overflow")
+
+        res_value = value * mult
 
     return res_value
 
diff --git a/pandas/_libs/tslibs/period.pyi b/pandas/_libs/tslibs/period.pyi
index 8826757e31c32..c85865fea8fd0 100644
--- a/pandas/_libs/tslibs/period.pyi
+++ b/pandas/_libs/tslibs/period.pyi
@@ -89,7 +89,7 @@ class Period(PeriodMixin):
     @classmethod
     def _from_ordinal(cls, ordinal: int, freq) -> Period: ...
     @classmethod
-    def now(cls, freq: BaseOffset = ...) -> Period: ...
+    def now(cls, freq: Frequency = ...) -> Period: ...
     def strftime(self, fmt: str) -> str: ...
     def to_timestamp(
         self,
diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi
index aba9b25b23154..6d993722ce1d4 100644
--- a/pandas/_libs/tslibs/timedeltas.pyi
+++ b/pandas/_libs/tslibs/timedeltas.pyi
@@ -14,6 +14,7 @@ from pandas._libs.tslibs import (
     Tick,
 )
 from pandas._typing import (
+    Frequency,
     Self,
     npt,
 )
@@ -117,9 +118,9 @@ class Timedelta(timedelta):
     @property
     def asm8(self) -> np.timedelta64: ...
     # TODO: round/floor/ceil could return NaT?
-    def round(self, freq: str) -> Self: ...
-    def floor(self, freq: str) -> Self: ...
-    def ceil(self, freq: str) -> Self: ...
+    def round(self, freq: Frequency) -> Self: ...
+    def floor(self, freq: Frequency) -> Self: ...
+    def ceil(self, freq: Frequency) -> Self: ...
     @property
     def resolution_string(self) -> str: ...
     def __add__(self, other: timedelta) -> Timedelta: ...
diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi
index 36ae2d6d892f1..e23f01b800874 100644
--- a/pandas/_libs/tslibs/timestamps.pyi
+++ b/pandas/_libs/tslibs/timestamps.pyi
@@ -8,6 +8,8 @@ from datetime import (
 from time import struct_time
 from typing import (
     ClassVar,
+    Literal,
+    TypeAlias,
     TypeVar,
     overload,
 )
@@ -27,6 +29,7 @@ from pandas._typing import (
 )
 
 _DatetimeT = TypeVar("_DatetimeT", bound=datetime)
+_TimeZones: TypeAlias = str | _tzinfo | None | int
 
 def integer_op_not_supported(obj: object) -> TypeError: ...
 
@@ -51,13 +54,13 @@ class Timestamp(datetime):
         tzinfo: _tzinfo | None = ...,
         *,
         nanosecond: int | None = ...,
-        tz: str | _tzinfo | None | int = ...,
+        tz: _TimeZones = ...,
         unit: str | int | None = ...,
         fold: int | None = ...,
     ) -> _DatetimeT | NaTType: ...
     @classmethod
     def _from_value_and_reso(
-        cls, value: int, reso: int, tz: _tzinfo | None
+        cls, value: int, reso: int, tz: _TimeZones
     ) -> Timestamp: ...
     @property
     def value(self) -> int: ...  # np.int64
@@ -84,19 +87,19 @@ class Timestamp(datetime):
     @property
     def fold(self) -> int: ...
     @classmethod
-    def fromtimestamp(cls, ts: float, tz: _tzinfo | None = ...) -> Self: ...
+    def fromtimestamp(cls, ts: float, tz: _TimeZones = ...) -> Self: ...
     @classmethod
     def utcfromtimestamp(cls, ts: float) -> Self: ...
     @classmethod
-    def today(cls, tz: _tzinfo | str | None = ...) -> Self: ...
+    def today(cls, tz: _TimeZones = ...) -> Self: ...
     @classmethod
     def fromordinal(
         cls,
         ordinal: int,
-        tz: _tzinfo | str | None = ...,
+        tz: _TimeZones = ...,
     ) -> Self: ...
     @classmethod
-    def now(cls, tz: _tzinfo | str | None = ...) -> Self: ...
+    def now(cls, tz: _TimeZones = ...) -> Self: ...
     @classmethod
     def utcnow(cls) -> Self: ...
     # error: Signature of "combine" incompatible with supertype "datetime"
@@ -131,7 +134,7 @@ class Timestamp(datetime):
         fold: int | None = ...,
     ) -> Self: ...
     # LSP violation: datetime.datetime.astimezone has a default value for tz
-    def astimezone(self, tz: _tzinfo | None) -> Self: ...  # type: ignore[override]
+    def astimezone(self, tz: _TimeZones) -> Self: ...  # type: ignore[override]
     def ctime(self) -> str: ...
     def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
     @classmethod
@@ -184,12 +187,12 @@ class Timestamp(datetime):
     def to_julian_date(self) -> np.float64: ...
     @property
     def asm8(self) -> np.datetime64: ...
-    def tz_convert(self, tz: _tzinfo | str | None) -> Self: ...
+    def tz_convert(self, tz: _TimeZones) -> Self: ...
     # TODO: could return NaT?
     def tz_localize(
         self,
-        tz: _tzinfo | str | None,
-        ambiguous: str = ...,
+        tz: _TimeZones,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def normalize(self) -> Self: ...
@@ -197,19 +200,19 @@ class Timestamp(datetime):
     def round(
         self,
         freq: str,
-        ambiguous: bool | str = ...,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def floor(
         self,
         freq: str,
-        ambiguous: bool | str = ...,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def ceil(
         self,
         freq: str,
-        ambiguous: bool | str = ...,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def day_name(self, locale: str | None = ...) -> str: ...
diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx
index 02934346130a5..7b306c5e681e0 100644
--- a/pandas/_libs/window/indexers.pyx
+++ b/pandas/_libs/window/indexers.pyx
@@ -138,6 +138,8 @@ def calculate_variable_window_bounds(
                         break
             # end bound is previous end
             # or current index
+            elif index[end[i - 1]] == end_bound and not right_closed:
+                end[i] = end[i - 1] + 1
             elif (index[end[i - 1]] - end_bound) * index_growth_sign <= 0:
                 end[i] = i + 1
             else:
diff --git a/pandas/_typing.py b/pandas/_typing.py
index 743815b91210d..c2bbebfbe2857 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -112,7 +112,7 @@
 # Cannot use `Sequence` because a string is a sequence, and we don't want to
 # accept that.  Could refine if https://github.com/python/typing/issues/256 is
 # resolved to differentiate between Sequence[str] and str
-ListLike = Union[AnyArrayLike, list, range]
+ListLike = Union[AnyArrayLike, list, tuple, range]
 
 # scalars
 
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index be0a762642e46..684e9dccdc0f9 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -30,6 +30,7 @@
     pa_version_under9p0,
     pa_version_under11p0,
     pa_version_under13p0,
+    pa_version_under14p0,
 )
 
 if TYPE_CHECKING:
@@ -186,6 +187,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
     "pa_version_under9p0",
     "pa_version_under11p0",
     "pa_version_under13p0",
+    "pa_version_under14p0",
     "IS64",
     "ISMUSL",
     "PY310",
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index c5792fa1379fe..fa0e9e974ea39 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -37,6 +37,7 @@
     "pyarrow": "7.0.0",
     "pyreadstat": "1.1.5",
     "pytest": "7.3.2",
+    "python-calamine": "0.1.6",
     "pyxlsb": "1.0.9",
     "s3fs": "2022.05.0",
     "scipy": "1.8.1",
@@ -62,6 +63,7 @@
     "lxml.etree": "lxml",
     "odf": "odfpy",
     "pandas_gbq": "pandas-gbq",
+    "python_calamine": "python-calamine",
     "sqlalchemy": "SQLAlchemy",
     "tables": "pytables",
 }
diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
index 049ce50920e28..12f58be109d98 100644
--- a/pandas/compat/pyarrow.py
+++ b/pandas/compat/pyarrow.py
@@ -15,6 +15,7 @@
     pa_version_under11p0 = _palv < Version("11.0.0")
     pa_version_under12p0 = _palv < Version("12.0.0")
     pa_version_under13p0 = _palv < Version("13.0.0")
+    pa_version_under14p0 = _palv < Version("14.0.0")
 except ImportError:
     pa_version_under7p0 = True
     pa_version_under8p0 = True
@@ -23,3 +24,4 @@
     pa_version_under11p0 = True
     pa_version_under12p0 = True
     pa_version_under13p0 = True
+    pa_version_under14p0 = True
diff --git a/pandas/conftest.py b/pandas/conftest.py
index d327f8d619f13..b083317c3174b 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -71,6 +71,7 @@
     Index,
     MultiIndex,
 )
+from pandas.util.version import Version
 
 if TYPE_CHECKING:
     from collections.abc import (
@@ -192,6 +193,10 @@ def pytest_collection_modifyitems(items, config) -> None:
             item.add_marker(pytest.mark.arraymanager)
 
 
+hypothesis_health_checks = [hypothesis.HealthCheck.too_slow]
+if Version(hypothesis.__version__) >= Version("6.83.2"):
+    hypothesis_health_checks.append(hypothesis.HealthCheck.differing_executors)
+
 # Hypothesis
 hypothesis.settings.register_profile(
     "ci",
@@ -203,7 +208,7 @@ def pytest_collection_modifyitems(items, config) -> None:
     # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to
     # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969)
     deadline=None,
-    suppress_health_check=(hypothesis.HealthCheck.too_slow,),
+    suppress_health_check=tuple(hypothesis_health_checks),
 )
 hypothesis.settings.load_profile("ci")
 
diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py
index 5cd4779907146..0a26acb7df60a 100644
--- a/pandas/core/_numba/executor.py
+++ b/pandas/core/_numba/executor.py
@@ -15,6 +15,45 @@
 from pandas.compat._optional import import_optional_dependency
 
 
+@functools.cache
+def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
+    if TYPE_CHECKING:
+        import numba
+    else:
+        numba = import_optional_dependency("numba")
+    nb_compat_func = numba.extending.register_jitable(func)
+
+    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
+    def nb_looper(values, axis):
+        # Operate on the first row/col in order to get
+        # the output shape
+        if axis == 0:
+            first_elem = values[:, 0]
+            dim0 = values.shape[1]
+        else:
+            first_elem = values[0]
+            dim0 = values.shape[0]
+        res0 = nb_compat_func(first_elem)
+        # Use np.asarray to get shape for
+        # https://github.com/numba/numba/issues/4202#issuecomment-1185981507
+        buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape
+        if axis == 0:
+            buf_shape = buf_shape[::-1]
+        buff = np.empty(buf_shape)
+
+        if axis == 1:
+            buff[0] = res0
+            for i in numba.prange(1, values.shape[0]):
+                buff[i] = nb_compat_func(values[i])
+        else:
+            buff[:, 0] = res0
+            for j in numba.prange(1, values.shape[1]):
+                buff[:, j] = nb_compat_func(values[:, j])
+        return buff
+
+    return nb_looper
+
+
 @functools.cache
 def make_looper(func, result_dtype, is_grouped_kernel, nopython, nogil, parallel):
     if TYPE_CHECKING:
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 0a9c1aad46f89..1d74bb8b83e4e 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -878,7 +878,9 @@ def value_counts_internal(
     if bins is not None:
         from pandas.core.reshape.tile import cut
 
-        values = Series(values, copy=False)
+        if isinstance(values, Series):
+            values = values._values
+
         try:
             ii = cut(values, bins, include_lowest=True)
         except TypeError as err:
@@ -998,7 +1000,7 @@ def duplicated(
     duplicated : ndarray[bool]
     """
     if hasattr(values, "dtype"):
-        if isinstance(values.dtype, ArrowDtype):
+        if isinstance(values.dtype, ArrowDtype) and values.dtype.kind in "ifub":
             values = values._to_masked()  # type: ignore[union-attr]
 
         if isinstance(values.dtype, BaseMaskedDtype):
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 4d6dd8f4fd577..9748d4fe66739 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -49,6 +49,7 @@
     ABCSeries,
 )
 
+from pandas.core._numba.executor import generate_apply_looper
 import pandas.core.common as com
 from pandas.core.construction import ensure_wrapped_if_datetimelike
 
@@ -80,6 +81,8 @@ def frame_apply(
     raw: bool = False,
     result_type: str | None = None,
     by_row: Literal[False, "compat"] = "compat",
+    engine: str = "python",
+    engine_kwargs: dict[str, bool] | None = None,
     args=None,
     kwargs=None,
 ) -> FrameApply:
@@ -100,6 +103,8 @@ def frame_apply(
         raw=raw,
         result_type=result_type,
         by_row=by_row,
+        engine=engine,
+        engine_kwargs=engine_kwargs,
         args=args,
         kwargs=kwargs,
     )
@@ -436,7 +441,13 @@ def compute_dict_like(
             Data for result. When aggregating with a Series, this can contain any
             Python object.
         """
+        from pandas.core.groupby.generic import (
+            DataFrameGroupBy,
+            SeriesGroupBy,
+        )
+
         obj = self.obj
+        is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
         func = cast(AggFuncTypeDict, self.func)
         func = self.normalize_dictlike_arg(op_name, selected_obj, func)
 
@@ -450,7 +461,7 @@ def compute_dict_like(
             colg = obj._gotitem(selection, ndim=1)
             results = [getattr(colg, op_name)(how, **kwargs) for _, how in func.items()]
             keys = list(func.keys())
-        elif is_non_unique_col:
+        elif not is_groupby and is_non_unique_col:
             # key used for column selection and output
             # GH#51099
             results = []
@@ -750,11 +761,15 @@ def __init__(
         result_type: str | None,
         *,
         by_row: Literal[False, "compat"] = False,
+        engine: str = "python",
+        engine_kwargs: dict[str, bool] | None = None,
         args,
         kwargs,
     ) -> None:
         if by_row is not False and by_row != "compat":
             raise ValueError(f"by_row={by_row} not allowed")
+        self.engine = engine
+        self.engine_kwargs = engine_kwargs
         super().__init__(
             obj, func, raw, result_type, by_row=by_row, args=args, kwargs=kwargs
         )
@@ -799,6 +814,12 @@ def values(self):
 
     def apply(self) -> DataFrame | Series:
         """compute the results"""
+
+        if self.engine == "numba" and not self.raw:
+            raise ValueError(
+                "The numba engine in DataFrame.apply can only be used when raw=True"
+            )
+
         # dispatch to handle list-like or dict-like
         if is_list_like(self.func):
             return self.apply_list_or_dict_like()
@@ -828,7 +849,7 @@ def apply(self) -> DataFrame | Series:
 
         # raw
         elif self.raw:
-            return self.apply_raw()
+            return self.apply_raw(engine=self.engine, engine_kwargs=self.engine_kwargs)
 
         return self.apply_standard()
 
@@ -901,7 +922,7 @@ def apply_empty_result(self):
         else:
             return self.obj.copy()
 
-    def apply_raw(self):
+    def apply_raw(self, engine="python", engine_kwargs=None):
         """apply to the values as a numpy array"""
 
         def wrap_function(func):
@@ -919,7 +940,27 @@ def wrapper(*args, **kwargs):
 
             return wrapper
 
-        result = np.apply_along_axis(wrap_function(self.func), self.axis, self.values)
+        if engine == "numba":
+            engine_kwargs = {} if engine_kwargs is None else engine_kwargs
+
+            # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
+            # incompatible type "Callable[..., Any] | str | list[Callable
+            # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
+            # list[Callable[..., Any] | str]]"; expected "Hashable"
+            nb_looper = generate_apply_looper(
+                self.func, **engine_kwargs  # type: ignore[arg-type]
+            )
+            result = nb_looper(self.values, self.axis)
+            # If we made the result 2-D, squeeze it back to 1-D
+            result = np.squeeze(result)
+        else:
+            result = np.apply_along_axis(
+                wrap_function(self.func),
+                self.axis,
+                self.values,
+                *self.args,
+                **self.kwargs,
+            )
 
         # TODO: mixed type case
         if result.ndim == 2:
@@ -1826,12 +1867,12 @@ def warn_alias_replacement(
         full_alias = alias
     else:
         full_alias = f"{type(obj).__name__}.{alias}"
-        alias = f"'{alias}'"
+        alias = f'"{alias}"'
     warnings.warn(
         f"The provided callable {func} is currently using "
         f"{full_alias}. In a future version of pandas, "
         f"the provided callable will be used directly. To keep current "
-        f"behavior pass {alias} instead.",
+        f"behavior pass the string {alias} instead.",
         category=FutureWarning,
         stacklevel=find_stack_level(),
     )
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index e815b8292b0cc..2b2e0c843564f 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -18,6 +18,7 @@
 from pandas._libs.tslibs import (
     Timedelta,
     Timestamp,
+    timezones,
 )
 from pandas.compat import (
     pa_version_under7p0,
@@ -29,12 +30,12 @@
 from pandas.util._decorators import doc
 from pandas.util._validators import validate_fillna_kwargs
 
+from pandas.core.dtypes.cast import can_hold_element
 from pandas.core.dtypes.common import (
     is_array_like,
     is_bool_dtype,
     is_integer,
     is_list_like,
-    is_object_dtype,
     is_scalar,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
@@ -1240,46 +1241,50 @@ def to_numpy(
     ) -> np.ndarray:
         if dtype is not None:
             dtype = np.dtype(dtype)
-        elif self._hasna:
-            dtype = np.dtype(object)
 
         if na_value is lib.no_default:
             na_value = self.dtype.na_value
 
         pa_type = self._pa_array.type
+        if not self._hasna or isna(na_value) or pa.types.is_null(pa_type):
+            data = self
+        else:
+            data = self.fillna(na_value)
+            copy = False
+
         if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type):
-            result = self._maybe_convert_datelike_array()
+            result = data._maybe_convert_datelike_array()
             if dtype is None or dtype.kind == "O":
                 result = result.to_numpy(dtype=object, na_value=na_value)
             else:
                 result = result.to_numpy(dtype=dtype)
-            return result
         elif pa.types.is_time(pa_type) or pa.types.is_date(pa_type):
             # convert to list of python datetime.time objects before
             # wrapping in ndarray
-            result = np.array(list(self), dtype=dtype)
-        elif is_object_dtype(dtype) and self._hasna:
-            result = np.empty(len(self), dtype=object)
-            mask = ~self.isna()
-            result[mask] = np.asarray(self[mask]._pa_array)
-        elif pa.types.is_null(self._pa_array.type):
-            fill_value = None if isna(na_value) else na_value
-            return np.full(len(self), fill_value=fill_value, dtype=dtype)
-        elif self._hasna:
-            data = self.fillna(na_value)
+            result = np.array(list(data), dtype=dtype)
+            if data._hasna:
+                result[data.isna()] = na_value
+        elif pa.types.is_null(pa_type):
+            if dtype is not None and isna(na_value):
+                na_value = None
+            result = np.full(len(data), fill_value=na_value, dtype=dtype)
+        elif not data._hasna or (pa.types.is_floating(pa_type) and na_value is np.nan):
             result = data._pa_array.to_numpy()
-            if dtype is not None:
-                result = result.astype(dtype, copy=False)
-            return result
-        else:
-            result = self._pa_array.to_numpy()
             if dtype is not None:
                 result = result.astype(dtype, copy=False)
             if copy:
                 result = result.copy()
-            return result
-        if self._hasna:
-            result[self.isna()] = na_value
+        else:
+            if dtype is None:
+                empty = pa.array([], type=pa_type).to_numpy(zero_copy_only=False)
+                if can_hold_element(empty, na_value):
+                    dtype = empty.dtype
+                else:
+                    dtype = np.object_
+            result = np.empty(len(data), dtype=dtype)
+            mask = data.isna()
+            result[mask] = na_value
+            result[~mask] = data[~mask]._pa_array.to_numpy()
         return result
 
     def unique(self) -> Self:
@@ -2188,11 +2193,11 @@ def _str_rstrip(self, to_strip=None):
         return type(self)(result)
 
     def _str_removeprefix(self, prefix: str):
-        # TODO: Should work once https://github.com/apache/arrow/issues/14991 is fixed
-        # starts_with = pc.starts_with(self._pa_array, pattern=prefix)
-        # removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
-        # result = pc.if_else(starts_with, removed, self._pa_array)
-        # return type(self)(result)
+        if not pa_version_under13p0:
+            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
+            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
+            result = pc.if_else(starts_with, removed, self._pa_array)
+            return type(self)(result)
         predicate = lambda val: val.removeprefix(prefix)
         result = self._apply_elementwise(predicate)
         return type(self)(pa.chunked_array(result))
@@ -2421,7 +2426,7 @@ def _dt_time(self):
 
     @property
     def _dt_tz(self):
-        return self.dtype.pyarrow_dtype.tz
+        return timezones.maybe_get_tz(self.dtype.pyarrow_dtype.tz)
 
     @property
     def _dt_unit(self):
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 9f63d1f97c54f..8d2633c10b428 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2597,7 +2597,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
             )
         values = sanitize_array(values, None, None)
         null_mask = np.asarray(isna(values))
-        code_values = self.categories.get_indexer(values)
+        code_values = self.categories.get_indexer_for(values)
         code_values = code_values[null_mask | (code_values >= 0)]
         return algorithms.isin(self.codes, code_values)
 
@@ -2948,7 +2948,7 @@ def recode_for_categories(
         return codes
 
     indexer = coerce_indexer_dtype(
-        new_categories.get_indexer(old_categories), new_categories
+        new_categories.get_indexer_for(old_categories), new_categories
     )
     new_codes = take_nd(indexer, codes, fill_value=-1)
     return new_codes
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index ff273e221394a..52596f29ffc0c 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -107,6 +107,7 @@
     algorithms,
     missing,
     nanops,
+    ops,
 )
 from pandas.core.algorithms import (
     checked_add_with_arr,
@@ -947,8 +948,12 @@ def _cmp_method(self, other, op):
 
         dtype = getattr(other, "dtype", None)
         if is_object_dtype(dtype):
-            return op(np.asarray(self, dtype=object), other)
-
+            # We have to use comp_method_OBJECT_ARRAY instead of numpy
+            #  comparison otherwise it would raise when comparing to None
+            result = ops.comp_method_OBJECT_ARRAY(
+                op, np.asarray(self.astype(object)), other
+            )
+            return result
         if other is NaT:
             if op is operator.ne:
                 result = np.ones(self.shape, dtype=bool)
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index c90127c0e9812..693ebad0ca16f 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -123,7 +123,8 @@ def __init__(self, storage=None) -> None:
                 storage = get_option("mode.string_storage")
         if storage not in {"python", "pyarrow", "pyarrow_numpy"}:
             raise ValueError(
-                f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
+                f"Storage must be 'python', 'pyarrow' or 'pyarrow_numpy'. "
+                f"Got {storage} instead."
             )
         if storage in ("pyarrow", "pyarrow_numpy") and pa_version_under7p0:
             raise ImportError(
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index f438f75707265..6262055827428 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -15,7 +15,10 @@
     lib,
     missing as libmissing,
 )
-from pandas.compat import pa_version_under7p0
+from pandas.compat import (
+    pa_version_under7p0,
+    pa_version_under13p0,
+)
 from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
@@ -47,6 +50,8 @@
 
 
 if TYPE_CHECKING:
+    from collections.abc import Sequence
+
     from pandas._typing import (
         Dtype,
         Scalar,
@@ -334,19 +339,13 @@ def _str_startswith(self, pat: str, na=None):
         result = pc.starts_with(self._pa_array, pattern=pat)
         if not isna(na):
             result = result.fill_null(na)
-        result = self._result_converter(result)
-        if not isna(na):
-            result[isna(result)] = bool(na)
-        return result
+        return self._result_converter(result)
 
     def _str_endswith(self, pat: str, na=None):
         result = pc.ends_with(self._pa_array, pattern=pat)
         if not isna(na):
             result = result.fill_null(na)
-        result = self._result_converter(result)
-        if not isna(na):
-            result[isna(result)] = bool(na)
-        return result
+        return self._result_converter(result)
 
     def _str_replace(
         self,
@@ -365,6 +364,12 @@ def _str_replace(
         result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
         return type(self)(result)
 
+    def _str_repeat(self, repeats: int | Sequence[int]):
+        if not isinstance(repeats, int):
+            return super()._str_repeat(repeats)
+        else:
+            return type(self)(pc.binary_repeat(self._pa_array, repeats))
+
     def _str_match(
         self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
     ):
@@ -379,6 +384,19 @@ def _str_fullmatch(
             pat = f"{pat}$"
         return self._str_match(pat, case, flags, na)
 
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ):
+        if stop is None:
+            return super()._str_slice(start, stop, step)
+        if start is None:
+            start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
     def _str_isalnum(self):
         result = pc.utf8_is_alnum(self._pa_array)
         return self._result_converter(result)
@@ -417,7 +435,7 @@ def _str_isupper(self):
 
     def _str_len(self):
         result = pc.utf8_length(self._pa_array)
-        return Int64Dtype().__from_arrow__(result)
+        return self._convert_int_dtype(result)
 
     def _str_lower(self):
         return type(self)(pc.utf8_lower(self._pa_array))
@@ -446,10 +464,56 @@ def _str_rstrip(self, to_strip=None):
             result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
         return type(self)(result)
 
+    def _str_removeprefix(self, prefix: str):
+        if not pa_version_under13p0:
+            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
+            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
+            result = pc.if_else(starts_with, removed, self._pa_array)
+            return type(self)(result)
+        return super()._str_removeprefix(prefix)
+
+    def _str_removesuffix(self, suffix: str):
+        ends_with = pc.ends_with(self._pa_array, pattern=suffix)
+        removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
+        result = pc.if_else(ends_with, removed, self._pa_array)
+        return type(self)(result)
+
+    def _str_count(self, pat: str, flags: int = 0):
+        if flags:
+            return super()._str_count(pat, flags)
+        result = pc.count_substring_regex(self._pa_array, pat)
+        return self._convert_int_dtype(result)
+
+    def _str_find(self, sub: str, start: int = 0, end: int | None = None):
+        if start != 0 and end is not None:
+            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
+            result = pc.find_substring(slices, sub)
+            not_found = pc.equal(result, -1)
+            offset_result = pc.add(result, end - start)
+            result = pc.if_else(not_found, result, offset_result)
+        elif start == 0 and end is None:
+            slices = self._pa_array
+            result = pc.find_substring(slices, sub)
+        else:
+            return super()._str_find(sub, start, end)
+        return self._convert_int_dtype(result)
+
+    def _convert_int_dtype(self, result):
+        return Int64Dtype().__from_arrow__(result)
+
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _storage = "pyarrow_numpy"
 
+    def __init__(self, values) -> None:
+        _chk_pyarrow_available()
+
+        if isinstance(values, (pa.Array, pa.ChunkedArray)) and pa.types.is_large_string(
+            values.type
+        ):
+            values = pc.cast(values, pa.string())
+        super().__init__(values)
+
     @classmethod
     def _result_converter(cls, values, na=None):
         if not isna(na):
@@ -459,7 +523,10 @@ def _result_converter(cls, values, na=None):
     def __getattribute__(self, item):
         # ArrowStringArray and we both inherit from ArrowExtensionArray, which
         # creates inheritance problems (Diamond inheritance)
-        if item in ArrowStringArrayMixin.__dict__ and item != "_pa_array":
+        if item in ArrowStringArrayMixin.__dict__ and item not in (
+            "_pa_array",
+            "__dict__",
+        ):
             return partial(getattr(ArrowStringArrayMixin, item), self)
         return super().__getattribute__(item)
 
@@ -517,34 +584,11 @@ def _str_map(
             return lib.map_infer_mask(arr, f, mask.view("uint8"))
 
     def _convert_int_dtype(self, result):
+        result = result.to_numpy()
         if result.dtype == np.int32:
             result = result.astype(np.int64)
         return result
 
-    def _str_count(self, pat: str, flags: int = 0):
-        if flags:
-            return super()._str_count(pat, flags)
-        result = pc.count_substring_regex(self._pa_array, pat).to_numpy()
-        return self._convert_int_dtype(result)
-
-    def _str_len(self):
-        result = pc.utf8_length(self._pa_array).to_numpy()
-        return self._convert_int_dtype(result)
-
-    def _str_find(self, sub: str, start: int = 0, end: int | None = None):
-        if start != 0 and end is not None:
-            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
-            result = pc.find_substring(slices, sub)
-            not_found = pc.equal(result, -1)
-            offset_result = pc.add(result, end - start)
-            result = pc.if_else(not_found, result, offset_result)
-        elif start == 0 and end is None:
-            slices = self._pa_array
-            result = pc.find_substring(slices, sub)
-        else:
-            return super()._str_find(sub, start, end)
-        return self._convert_int_dtype(result.to_numpy())
-
     def _cmp_method(self, other, op):
         result = super()._cmp_method(other, op)
         return result.to_numpy(np.bool_, na_value=False)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index d973f8f5fe35a..3026189e747bb 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -485,8 +485,8 @@ def array(self) -> ExtensionArray:
             types, this is the actual array. For NumPy native types, this
             is a thin (no copy) wrapper around :class:`numpy.ndarray`.
 
-            ``.array`` differs ``.values`` which may require converting the
-            data to a different form.
+            ``.array`` differs from ``.values``, which may require converting
+            the data to a different form.
 
         See Also
         --------
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 62455f119a02f..750b374043193 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -513,11 +513,11 @@ def use_inf_as_na_cb(key) -> None:
     auto, {others}.
 """
 
-_xls_options = ["xlrd"]
-_xlsm_options = ["xlrd", "openpyxl"]
-_xlsx_options = ["xlrd", "openpyxl"]
-_ods_options = ["odf"]
-_xlsb_options = ["pyxlsb"]
+_xls_options = ["xlrd", "calamine"]
+_xlsm_options = ["xlrd", "openpyxl", "calamine"]
+_xlsx_options = ["xlrd", "openpyxl", "calamine"]
+_ods_options = ["odf", "calamine"]
+_xlsb_options = ["pyxlsb", "calamine"]
 
 
 with cf.config_prefix("io.excel.xls"):
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index f76163cbbd0a1..12de63967c78f 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -70,7 +70,7 @@
     from collections.abc import MutableMapping
     from datetime import tzinfo
 
-    import pyarrow as pa  # noqa: F811, TCH004
+    import pyarrow as pa  # noqa: TCH004
 
     from pandas._typing import (
         Dtype,
@@ -2148,6 +2148,8 @@ def type(self):
             return CategoricalDtypeType
         elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type):
             return list
+        elif pa.types.is_fixed_size_list(pa_type):
+            return list
         elif pa.types.is_map(pa_type):
             return list
         elif pa.types.is_struct(pa_type):
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 227994ab924c1..188df1689ead7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1937,11 +1937,17 @@ def to_dict(
         self,
         orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
         into: type[dict] = ...,
+        index: bool = ...,
     ) -> dict:
         ...
 
     @overload
-    def to_dict(self, orient: Literal["records"], into: type[dict] = ...) -> list[dict]:
+    def to_dict(
+        self,
+        orient: Literal["records"],
+        into: type[dict] = ...,
+        index: bool = ...,
+    ) -> list[dict]:
         ...
 
     @deprecate_nonkeyword_arguments(
@@ -8876,20 +8882,20 @@ def update(
         >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
         ...                               'Parrot', 'Parrot'],
         ...                    'Max Speed': [380., 370., 24., 26.]})
-        >>> df.groupby("Animal", group_keys=True).apply(lambda x: x)
-                  Animal  Max Speed
+        >>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
+                  Max Speed
         Animal
-        Falcon 0  Falcon      380.0
-               1  Falcon      370.0
-        Parrot 2  Parrot       24.0
-               3  Parrot       26.0
-
-        >>> df.groupby("Animal", group_keys=False).apply(lambda x: x)
-           Animal  Max Speed
-        0  Falcon      380.0
-        1  Falcon      370.0
-        2  Parrot       24.0
-        3  Parrot       26.0
+        Falcon 0      380.0
+               1      370.0
+        Parrot 2       24.0
+               3       26.0
+
+        >>> df.groupby("Animal", group_keys=False)[['Max Speed']].apply(lambda x: x)
+           Max Speed
+        0      380.0
+        1      370.0
+        2       24.0
+        3       26.0
         """
         )
     )
@@ -9932,6 +9938,8 @@ def apply(
         result_type: Literal["expand", "reduce", "broadcast"] | None = None,
         args=(),
         by_row: Literal[False, "compat"] = "compat",
+        engine: Literal["python", "numba"] = "python",
+        engine_kwargs: dict[str, bool] | None = None,
         **kwargs,
     ):
         """
@@ -9991,6 +9999,35 @@ def apply(
             If False, the funcs will be passed the whole Series at once.
 
             .. versionadded:: 2.1.0
+
+        engine : {'python', 'numba'}, default 'python'
+            Choose between the python (default) engine or the numba engine in apply.
+
+            The numba engine will attempt to JIT compile the passed function,
+            which may result in speedups for large DataFrames.
+            It also supports the following engine_kwargs :
+
+            - nopython (compile the function in nopython mode)
+            - nogil (release the GIL inside the JIT compiled function)
+            - parallel (try to apply the function in parallel over the DataFrame)
+
+            Note: The numba compiler only supports a subset of
+            valid Python/numpy operations.
+
+            Please read more about the `supported python features
+            <https://numba.pydata.org/numba-doc/dev/reference/pysupported.html>`_
+            and `supported numpy features
+            <https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html>`_
+            in numba to learn what you can or cannot use in the passed function.
+
+            As of right now, the numba engine can only be used with raw=True.
+
+            .. versionadded:: 2.2.0
+
+        engine_kwargs : dict
+            Pass keyword arguments to the engine.
+            This is currently only used by the numba engine,
+            see the documentation for the engine argument for more information.
         **kwargs
             Additional keyword arguments to pass as keywords arguments to
             `func`.
@@ -10091,6 +10128,8 @@ def apply(
             raw=raw,
             result_type=result_type,
             by_row=by_row,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
             args=args,
             kwargs=kwargs,
         )
@@ -11310,7 +11349,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
     def any(  # type: ignore[override]
         self,
         *,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool = False,
         skipna: bool = True,
         **kwargs,
@@ -11325,7 +11364,7 @@ def any(  # type: ignore[override]
     @doc(make_doc("all", ndim=2))
     def all(
         self,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool = False,
         skipna: bool = True,
         **kwargs,
@@ -11724,6 +11763,7 @@ def quantile(
         axis: Axis = ...,
         numeric_only: bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: Literal["single", "table"] = ...,
     ) -> Series:
         ...
 
@@ -11734,6 +11774,7 @@ def quantile(
         axis: Axis = ...,
         numeric_only: bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: Literal["single", "table"] = ...,
     ) -> Series | DataFrame:
         ...
 
@@ -11744,6 +11785,7 @@ def quantile(
         axis: Axis = ...,
         numeric_only: bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: Literal["single", "table"] = ...,
     ) -> Series | DataFrame:
         ...
 
@@ -11843,11 +11885,10 @@ def quantile(
 
         if not is_list_like(q):
             # BlockManager.quantile expects listlike, so we wrap and unwrap here
-            # error: List item 0 has incompatible type "Union[float, Union[Union[
-            # ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]";
-            # expected "float"
-            res_df = self.quantile(  # type: ignore[call-overload]
-                [q],
+            # error: List item 0 has incompatible type "float | ExtensionArray |
+            # ndarray[Any, Any] | Index | Series | Sequence[float]"; expected "float"
+            res_df = self.quantile(
+                [q],  # type: ignore[list-item]
                 axis=axis,
                 numeric_only=numeric_only,
                 interpolation=interpolation,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8e75d27a953ad..0523b25c39602 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2848,7 +2848,7 @@ def to_sql(
 
         index : bool, default True
             Write DataFrame index as a column. Uses `index_label` as the column
-            name in the table.
+            name in the table. Creates a table index for this column.
         index_label : str or sequence, default None
             Column label for index column(s). If None is given (default) and
             `index` is True, then the index names are used.
@@ -5719,10 +5719,12 @@ def filter(
 
         if items is not None:
             name = self._get_axis_name(axis)
+            items = Index(items).intersection(labels)
+            if len(items) == 0:
+                # Keep the dtype of labels when we are empty
+                items = items.astype(labels.dtype)
             # error: Keywords must be strings
-            return self.reindex(  # type: ignore[misc]
-                **{name: labels.intersection(items)}
-            )
+            return self.reindex(**{name: items})  # type: ignore[misc]
         elif like:
 
             def f(x) -> bool_t:
@@ -7939,6 +7941,51 @@ def replace(
         else:
             return result.__finalize__(self, method="replace")
 
+    @overload
+    def interpolate(
+        self,
+        method: InterpolateOptions = ...,
+        *,
+        axis: Axis = ...,
+        limit: int | None = ...,
+        inplace: Literal[False] = ...,
+        limit_direction: Literal["forward", "backward", "both"] | None = ...,
+        limit_area: Literal["inside", "outside"] | None = ...,
+        downcast: Literal["infer"] | None | lib.NoDefault = ...,
+        **kwargs,
+    ) -> Self:
+        ...
+
+    @overload
+    def interpolate(
+        self,
+        method: InterpolateOptions = ...,
+        *,
+        axis: Axis = ...,
+        limit: int | None = ...,
+        inplace: Literal[True],
+        limit_direction: Literal["forward", "backward", "both"] | None = ...,
+        limit_area: Literal["inside", "outside"] | None = ...,
+        downcast: Literal["infer"] | None | lib.NoDefault = ...,
+        **kwargs,
+    ) -> None:
+        ...
+
+    @overload
+    def interpolate(
+        self,
+        method: InterpolateOptions = ...,
+        *,
+        axis: Axis = ...,
+        limit: int | None = ...,
+        inplace: bool_t = ...,
+        limit_direction: Literal["forward", "backward", "both"] | None = ...,
+        limit_area: Literal["inside", "outside"] | None = ...,
+        downcast: Literal["infer"] | None | lib.NoDefault = ...,
+        **kwargs,
+    ) -> Self | None:
+        ...
+
     @final
     def interpolate(
         self,
@@ -8181,10 +8228,11 @@ def interpolate(
                         stacklevel=find_stack_level(),
                     )
 
-        if "fill_value" in kwargs:
+        if method in fillna_methods and "fill_value" in kwargs:
             raise ValueError(
                 "'fill_value' is not a valid keyword for "
-                f"{type(self).__name__}.interpolate"
+                f"{type(self).__name__}.interpolate with method from "
+                f"{fillna_methods}"
             )
 
         if isinstance(obj.index, MultiIndex) and method != "linear":
@@ -8608,6 +8656,42 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
         # GH 40420
         return self.where(subset, threshold, axis=axis, inplace=inplace)
 
+    @overload
+    def clip(
+        self,
+        lower=...,
+        upper=...,
+        *,
+        axis: Axis | None = ...,
+        inplace: Literal[False] = ...,
+        **kwargs,
+    ) -> Self:
+        ...
+
+    @overload
+    def clip(
+        self,
+        lower=...,
+        upper=...,
+        *,
+        axis: Axis | None = ...,
+        inplace: Literal[True],
+        **kwargs,
+    ) -> None:
+        ...
+
+    @overload
+    def clip(
+        self,
+        lower=...,
+        upper=...,
+        *,
+        axis: Axis | None = ...,
+        inplace: bool_t = ...,
+        **kwargs,
+    ) -> Self | None:
+        ...
+
     @final
     def clip(
         self,
@@ -11710,15 +11794,21 @@ def pct_change(
                 stacklevel=find_stack_level(),
             )
         if fill_method is lib.no_default:
-            if self.isna().values.any():
-                warnings.warn(
-                    "The default fill_method='pad' in "
-                    f"{type(self).__name__}.pct_change is deprecated and will be "
-                    "removed in a future version. Call ffill before calling "
-                    "pct_change to retain current behavior and silence this warning.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
+            cols = self.items() if self.ndim == 2 else [(None, self)]
+            for _, col in cols:
+                mask = col.isna().values
+                mask = mask[np.argmax(~mask) :]
+                if mask.any():
+                    warnings.warn(
+                        "The default fill_method='pad' in "
+                        f"{type(self).__name__}.pct_change is deprecated and will be "
+                        "removed in a future version. Call ffill before calling "
+                        "pct_change to retain current behavior and silence this "
+                        "warning.",
+                        FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
+                    break
             fill_method = "pad"
         if limit is lib.no_default:
             limit = None
@@ -11744,7 +11834,7 @@ def _logical_func(
         self,
         name: str,
         func,
-        axis: Axis = 0,
+        axis: Axis | None = 0,
         bool_only: bool_t = False,
         skipna: bool_t = True,
         **kwargs,
@@ -11757,7 +11847,10 @@ def _logical_func(
             res = self._logical_func(
                 name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
             )
-            return res._logical_func(name, func, skipna=skipna, **kwargs)
+            # error: Item "bool" of "Series | bool" has no attribute "_logical_func"
+            return res._logical_func(  # type: ignore[union-attr]
+                name, func, skipna=skipna, **kwargs
+            )
         elif axis is None:
             axis = 0
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 11c97e30ab5cd..e6dd6a990d285 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -180,6 +180,19 @@ class providing the base-class of operations.
         A callable that takes a {input} as its first argument, and
         returns a dataframe, a series or a scalar. In addition the
         callable may take positional and keyword arguments.
+    include_groups : bool, default True
+        When True, will attempt to apply ``func`` to the groupings in
+        the case that they are columns of the DataFrame. If this raises a
+        TypeError, the result will be computed with the groupings excluded.
+        When False, the groupings will be excluded when applying ``func``.
+
+        .. versionadded:: 2.2.0
+
+        .. deprecated:: 2.2.0
+
+           Setting include_groups to True is deprecated. Only the value
+           False will be allowed in a future version of pandas.
+
     args, kwargs : tuple and dict
         Optional positional and keyword arguments to pass to ``func``.
 
@@ -272,7 +285,7 @@ class providing the base-class of operations.
     each group together into a Series, including setting the index as
     appropriate:
 
-    >>> g1.apply(lambda x: x.C.max() - x.B.min())
+    >>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False)
     A
     a    5
     b    2
@@ -1080,7 +1093,8 @@ def get_group(self, name, obj=None) -> DataFrame | Series:
             raise KeyError(name)
 
         if obj is None:
-            return self._selected_obj.iloc[inds]
+            indexer = inds if self.axis == 0 else (slice(None), inds)
+            return self._selected_obj.iloc[indexer]
         else:
             warnings.warn(
                 "obj is deprecated and will be removed in a future version. "
@@ -1747,7 +1761,7 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
             input="dataframe", examples=_apply_docs["dataframe_examples"]
         )
     )
-    def apply(self, func, *args, **kwargs) -> NDFrameT:
+    def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
         orig_func = func
         func = com.is_builtin_func(func)
         if orig_func != func:
@@ -1780,10 +1794,25 @@ def f(g):
         else:
             f = func
 
+        if not include_groups:
+            return self._python_apply_general(f, self._obj_with_exclusions)
+
         # ignore SettingWithCopy here in case the user mutates
         with option_context("mode.chained_assignment", None):
             try:
                 result = self._python_apply_general(f, self._selected_obj)
+                if (
+                    not isinstance(self.obj, Series)
+                    and self._selection is None
+                    and self._selected_obj.shape != self._obj_with_exclusions.shape
+                ):
+                    warnings.warn(
+                        message=_apply_groupings_depr.format(
+                            type(self).__name__, "apply"
+                        ),
+                        category=FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
             except TypeError:
                 # gh-20949
                 # try again, with .apply acting as a filtering
@@ -3519,7 +3548,7 @@ def describe(
         return result
 
     @final
-    def resample(self, rule, *args, **kwargs) -> Resampler:
+    def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resampler:
         """
         Provide resampling when using a TimeGrouper.
 
@@ -3533,7 +3562,23 @@ def resample(self, rule, *args, **kwargs) -> Resampler:
         ----------
         rule : str or DateOffset
             The offset string or object representing target grouper conversion.
-        *args, **kwargs
+        *args
+            Possible arguments are `how`, `fill_method`, `limit`, `kind` and
+            `on`, and other arguments of `TimeGrouper`.
+        include_groups : bool, default True
+            When True, will attempt to include the groupings in the operation in
+            the case that they are columns of the DataFrame. If this raises a
+            TypeError, the result will be computed with the groupings excluded.
+            When False, the groupings will be excluded when applying ``func``.
+
+            .. versionadded:: 2.2.0
+
+            .. deprecated:: 2.2.0
+
+               Setting include_groups to True is deprecated. Only the value
+               False will be allowed in a future version of pandas.
+
+        **kwargs
             Possible arguments are `how`, `fill_method`, `limit`, `kind` and
             `on`, and other arguments of `TimeGrouper`.
 
@@ -3569,59 +3614,71 @@ def resample(self, rule, *args, **kwargs) -> Resampler:
         Downsample the DataFrame into 3 minute bins and sum the values of
         the timestamps falling into a bin.
 
-        >>> df.groupby('a').resample('3min').sum()
-                                 a  b
+        >>> df.groupby('a').resample('3min', include_groups=False).sum()
+                                 b
         a
-        0   2000-01-01 00:00:00  0  2
-            2000-01-01 00:03:00  0  1
-        5   2000-01-01 00:00:00  5  1
+        0   2000-01-01 00:00:00  2
+            2000-01-01 00:03:00  1
+        5   2000-01-01 00:00:00  1
 
         Upsample the series into 30 second bins.
 
-        >>> df.groupby('a').resample('30s').sum()
-                            a  b
+        >>> df.groupby('a').resample('30s', include_groups=False).sum()
+                            b
         a
-        0   2000-01-01 00:00:00  0  1
-            2000-01-01 00:00:30  0  0
-            2000-01-01 00:01:00  0  1
-            2000-01-01 00:01:30  0  0
-            2000-01-01 00:02:00  0  0
-            2000-01-01 00:02:30  0  0
-            2000-01-01 00:03:00  0  1
-        5   2000-01-01 00:02:00  5  1
+        0   2000-01-01 00:00:00  1
+            2000-01-01 00:00:30  0
+            2000-01-01 00:01:00  1
+            2000-01-01 00:01:30  0
+            2000-01-01 00:02:00  0
+            2000-01-01 00:02:30  0
+            2000-01-01 00:03:00  1
+        5   2000-01-01 00:02:00  1
 
         Resample by month. Values are assigned to the month of the period.
 
-        >>> df.groupby('a').resample('M').sum()
-                    a  b
+        >>> df.groupby('a').resample('M', include_groups=False).sum()
+                    b
         a
-        0   2000-01-31  0  3
-        5   2000-01-31  5  1
+        0   2000-01-31  3
+        5   2000-01-31  1
 
         Downsample the series into 3 minute bins as above, but close the right
         side of the bin interval.
 
-        >>> df.groupby('a').resample('3min', closed='right').sum()
-                                 a  b
+        >>> (
+        ...     df.groupby('a')
+        ...     .resample('3min', closed='right', include_groups=False)
+        ...     .sum()
+        ... )
+                                 b
         a
-        0   1999-12-31 23:57:00  0  1
-            2000-01-01 00:00:00  0  2
-        5   2000-01-01 00:00:00  5  1
+        0   1999-12-31 23:57:00  1
+            2000-01-01 00:00:00  2
+        5   2000-01-01 00:00:00  1
 
         Downsample the series into 3 minute bins and close the right side of
         the bin interval, but label each bin using the right edge instead of
         the left.
 
-        >>> df.groupby('a').resample('3min', closed='right', label='right').sum()
-                                 a  b
+        >>> (
+        ...     df.groupby('a')
+        ...     .resample('3min', closed='right', label='right', include_groups=False)
+        ...     .sum()
+        ... )
+                                 b
         a
-        0   2000-01-01 00:00:00  0  1
-            2000-01-01 00:03:00  0  2
-        5   2000-01-01 00:03:00  5  1
+        0   2000-01-01 00:00:00  1
+            2000-01-01 00:03:00  2
+        5   2000-01-01 00:03:00  1
         """
         from pandas.core.resample import get_resampler_for_grouping
 
-        return get_resampler_for_grouping(self, rule, *args, **kwargs)
+        # mypy flags that include_groups could be specified via `*args` or `**kwargs`
+        # GH#54961 would resolve.
+        return get_resampler_for_grouping(  # type: ignore[misc]
+            self, rule, *args, include_groups=include_groups, **kwargs
+        )
 
     @final
     def rolling(self, *args, **kwargs) -> RollingGroupby:
@@ -5727,3 +5784,13 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
         mi = MultiIndex(levels=levels, codes=codes, names=[idx.name, None])
 
     return mi
+
+
+# GH#7155
+_apply_groupings_depr = (
+    "{}.{} operated on the grouping columns. This behavior is deprecated, "
+    "and in a future version of pandas the grouping columns will be excluded "
+    "from the operation. Either pass `include_groups=False` to exclude the "
+    "groupings or explicitly select the grouping columns after groupby to silence "
+    "this warning."
+)
diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py
index 694a420ad2494..c13ec51ff3851 100644
--- a/pandas/core/indexers/objects.py
+++ b/pandas/core/indexers/objects.py
@@ -262,7 +262,9 @@ def get_window_bounds(
             # end bound is previous end
             # or current index
             end_diff = (self.index[end[i - 1]] - end_bound) * index_growth_sign
-            if end_diff <= zero:
+            if end_diff == zero and not right_closed:
+                end[i] = end[i - 1] + 1
+            elif end_diff <= zero:
                 end[i] = i + 1
             else:
                 end[i] = end[i - 1]
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 781dfae7fef64..877b8edb32520 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -239,8 +239,12 @@ def _unique_indices(inds, dtype) -> Index:
         Index
         """
         if all(isinstance(ind, Index) for ind in inds):
-            result = inds[0].append(inds[1:]).unique()
-            result = result.astype(dtype, copy=False)
+            inds = [ind.astype(dtype, copy=False) for ind in inds]
+            result = inds[0].unique()
+            other = inds[1].append(inds[2:])
+            diff = other[result.get_indexer_for(other) == -1]
+            if len(diff):
+                result = result.append(diff.unique())
             if sort:
                 result = result.sort_values()
             return result
@@ -288,7 +292,6 @@ def _find_common_index_dtype(inds):
             raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
 
         if len(dtis) == len(indexes):
-            sort = True
             result = indexes[0]
 
         elif len(dtis) > 1:
@@ -377,5 +380,5 @@ def all_indexes_same(indexes) -> bool:
 
 
 def default_index(n: int) -> RangeIndex:
-    rng = range(0, n)
+    rng = range(n)
     return RangeIndex._simple_new(rng, name=None)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 6a397862712de..8756bb3f3c81b 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3615,21 +3615,10 @@ def difference(self, other, sort=None):
 
     def _difference(self, other, sort):
         # overridden by RangeIndex
-
-        this = self.unique()
-
-        indexer = this.get_indexer_for(other)
-        indexer = indexer.take((indexer != -1).nonzero()[0])
-
-        label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
-
-        the_diff: MultiIndex | ArrayLike
-        if isinstance(this, ABCMultiIndex):
-            the_diff = this.take(label_diff)
-        else:
-            the_diff = this._values.take(label_diff)
+        other = other.unique()
+        the_diff = self[other.get_indexer_for(self) == -1]
+        the_diff = the_diff if self.is_unique else the_diff.unique()
         the_diff = _maybe_try_sort(the_diff, sort)
-
         return the_diff
 
     def _wrap_difference_result(self, other, result):
@@ -4557,7 +4546,7 @@ def join(
         -------
         join_index, (left_indexer, right_indexer)
 
-         Examples
+        Examples
         --------
         >>> idx1 = pd.Index([1, 2, 3])
         >>> idx2 = pd.Index([4, 5, 6])
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index dcb5f8caccd3e..400747cbf6b8d 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -198,8 +198,6 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
     timetz
     dayofyear
     day_of_year
-    weekofyear
-    week
     dayofweek
     day_of_week
     weekday
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 4d33f0137d3c4..b2d463a8c6c26 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -177,7 +177,7 @@ def concatenate_managers(
                 values = np.concatenate(vals, axis=1)  # type: ignore[arg-type]
             elif is_1d_only_ea_dtype(blk.dtype):
                 # TODO(EA2D): special-casing not needed with 2D EAs
-                values = concat_compat(vals, axis=1, ea_compat_axis=True)
+                values = concat_compat(vals, axis=0, ea_compat_axis=True)
                 values = ensure_block_shape(values, ndim=2)
             else:
                 values = concat_compat(vals, axis=1)
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 4cb7b610074ba..b1db2d2e708e8 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -969,6 +969,10 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
         n = len(self)
 
         if isinstance(dtype, ExtensionDtype):
+            # TODO: use object dtype as workaround for non-performant
+            #  EA.__setitem__ methods. (primarily ArrowExtensionArray.__setitem__
+            #  when iteratively setting individual values)
+            #  https://github.com/pandas-dev/pandas/pull/54508#issuecomment-1675827918
             result = np.empty(n, dtype=object)
         else:
             result = np.empty(n, dtype=dtype)
diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py
index e89f641e17296..f4e0dcddcd34a 100644
--- a/pandas/core/methods/to_dict.py
+++ b/pandas/core/methods/to_dict.py
@@ -106,13 +106,13 @@ def to_dict(
         return into_c((k, v.to_dict(into)) for k, v in df.items())
 
     elif orient == "list":
-        object_dtype_indices_as_set = set(box_native_indices)
+        object_dtype_indices_as_set: set[int] = set(box_native_indices)
         return into_c(
             (
                 k,
-                list(map(maybe_box_native, v.tolist()))
+                list(map(maybe_box_native, v.to_numpy().tolist()))
                 if i in object_dtype_indices_as_set
-                else v.tolist(),
+                else v.to_numpy().tolist(),
             )
             for i, (k, v) in enumerate(df.items())
         )
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 5ff18d8a25e36..9605bf154a8b7 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -32,7 +32,10 @@
     Substitution,
     doc,
 )
-from pandas.util._exceptions import find_stack_level
+from pandas.util._exceptions import (
+    find_stack_level,
+    rewrite_warning,
+)
 
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -57,6 +60,7 @@
 from pandas.core.groupby.groupby import (
     BaseGroupBy,
     GroupBy,
+    _apply_groupings_depr,
     _pipe_template,
     get_groupby,
 )
@@ -163,6 +167,7 @@ def __init__(
         gpr_index: Index,
         group_keys: bool = False,
         selection=None,
+        include_groups: bool = True,
     ) -> None:
         self._timegrouper = timegrouper
         self.keys = None
@@ -171,6 +176,7 @@ def __init__(
         self.kind = kind
         self.group_keys = group_keys
         self.as_index = True
+        self.include_groups = include_groups
 
         self.obj, self.ax, self._indexer = self._timegrouper._set_grouper(
             self._convert_obj(obj), sort=True, gpr_index=gpr_index
@@ -444,7 +450,9 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
             #  a DataFrame column, but aggregate_item_by_item operates column-wise
             #  on Series, raising AttributeError or KeyError
             #  (depending on whether the column lookup uses getattr/__getitem__)
-            result = grouped.apply(how, *args, **kwargs)
+            result = _apply(
+                grouped, how, *args, include_groups=self.include_groups, **kwargs
+            )
 
         except ValueError as err:
             if "Must produce aggregated value" in str(err):
@@ -456,15 +464,21 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
 
             # we have a non-reducing function
             # try to evaluate
-            result = grouped.apply(how, *args, **kwargs)
+            result = _apply(
+                grouped, how, *args, include_groups=self.include_groups, **kwargs
+            )
 
         return self._wrap_result(result)
 
-    def _get_resampler_for_grouping(self, groupby: GroupBy, key):
+    def _get_resampler_for_grouping(
+        self, groupby: GroupBy, key, include_groups: bool = True
+    ):
         """
         Return the correct class for resampling with groupby.
         """
-        return self._resampler_for_grouping(groupby=groupby, key=key, parent=self)
+        return self._resampler_for_grouping(
+            groupby=groupby, key=key, parent=self, include_groups=include_groups
+        )
 
     def _wrap_result(self, result):
         """
@@ -1590,6 +1604,7 @@ def __init__(
         groupby: GroupBy,
         key=None,
         selection: IndexLabel | None = None,
+        include_groups: bool = False,
     ) -> None:
         # reached via ._gotitem and _get_resampler_for_grouping
 
@@ -1612,6 +1627,7 @@ def __init__(
 
         self.ax = parent.ax
         self.obj = parent.obj
+        self.include_groups = include_groups
 
     @no_type_check
     def _apply(self, f, *args, **kwargs):
@@ -1628,7 +1644,7 @@ def func(x):
 
             return x.apply(f, *args, **kwargs)
 
-        result = self._groupby.apply(func)
+        result = _apply(self._groupby, func, include_groups=self.include_groups)
         return self._wrap_result(result)
 
     _upsample = _apply
@@ -2003,6 +2019,7 @@ def get_resampler_for_grouping(
     limit: int | None = None,
     kind=None,
     on=None,
+    include_groups: bool = True,
     **kwargs,
 ) -> Resampler:
     """
@@ -2011,7 +2028,9 @@ def get_resampler_for_grouping(
     # .resample uses 'on' similar to how .groupby uses 'key'
     tg = TimeGrouper(freq=rule, key=on, **kwargs)
     resampler = tg._get_resampler(groupby.obj, kind=kind)
-    return resampler._get_resampler_for_grouping(groupby=groupby, key=tg.key)
+    return resampler._get_resampler_for_grouping(
+        groupby=groupby, include_groups=include_groups, key=tg.key
+    )
 
 
 class TimeGrouper(Grouper):
@@ -2789,3 +2808,18 @@ def maybe_warn_args_and_kwargs(cls, kernel: str, args, kwargs) -> None:
         category=FutureWarning,
         stacklevel=find_stack_level(),
     )
+
+
+def _apply(
+    grouped: GroupBy, how: Callable, *args, include_groups: bool, **kwargs
+) -> DataFrame:
+    # GH#7155 - rewrite warning to appear as if it came from `.resample`
+    target_message = "DataFrameGroupBy.apply operated on the grouping columns"
+    new_message = _apply_groupings_depr.format("DataFrameGroupBy", "resample")
+    with rewrite_warning(
+        target_message=target_message,
+        target_category=FutureWarning,
+        new_message=new_message,
+    ):
+        result = grouped.apply(how, *args, include_groups=include_groups, **kwargs)
+    return result
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index f3695fb87ea78..6d1ff07e07c76 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1272,12 +1272,7 @@ def _get_merge_keys(
                             # work-around for merge_asof(right_index=True)
                             right_keys.append(right.index._values)
                         if lk is not None and lk == rk:  # FIXME: what about other NAs?
-                            # avoid key upcast in corner case (length-0)
-                            lk = cast(Hashable, lk)
-                            if len(left) > 0:
-                                right_drop.append(rk)
-                            else:
-                                left_drop.append(lk)
+                            right_drop.append(rk)
                     else:
                         rk = cast(ArrayLike, rk)
                         right_keys.append(rk)
@@ -2421,7 +2416,8 @@ def _factorize_keys(
 
     elif isinstance(lk, ExtensionArray) and lk.dtype == rk.dtype:
         if (isinstance(lk.dtype, ArrowDtype) and is_string_dtype(lk.dtype)) or (
-            isinstance(lk.dtype, StringDtype) and lk.dtype.storage == "pyarrow"
+            isinstance(lk.dtype, StringDtype)
+            and lk.dtype.storage in ["pyarrow", "pyarrow_numpy"]
         ):
             import pyarrow as pa
             import pyarrow.compute as pc
@@ -2437,8 +2433,12 @@ def _factorize_keys(
             length = len(dc.dictionary)
 
             llab, rlab, count = (
-                pc.fill_null(dc.indices[slice(len_lk)], length).to_numpy(),
-                pc.fill_null(dc.indices[slice(len_lk, None)], length).to_numpy(),
+                pc.fill_null(dc.indices[slice(len_lk)], length)
+                .to_numpy()
+                .astype(np.intp, copy=False),
+                pc.fill_null(dc.indices[slice(len_lk, None)], length)
+                .to_numpy()
+                .astype(np.intp, copy=False),
                 len(dc.dictionary),
             )
             if how == "right":
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 924b56f7a14d5..79354fdd12a2d 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -7,6 +7,7 @@
 from typing import (
     TYPE_CHECKING,
     Callable,
+    Literal,
     cast,
 )
 
@@ -449,7 +450,7 @@ def _all_key():
             return (margins_name,) + ("",) * (len(cols) - 1)
 
         if len(rows) > 0:
-            margin = data[rows].groupby(rows, observed=observed).apply(aggfunc)
+            margin = data.groupby(rows, observed=observed)[rows].apply(aggfunc)
             all_key = _all_key()
             table[all_key] = margin
             result = table
@@ -467,7 +468,7 @@ def _all_key():
         margin_keys = table.columns
 
     if len(cols):
-        row_margin = data[cols].groupby(cols, observed=observed).apply(aggfunc)
+        row_margin = data.groupby(cols, observed=observed)[cols].apply(aggfunc)
     else:
         row_margin = Series(np.nan, index=result.columns)
 
@@ -569,7 +570,7 @@ def crosstab(
     margins: bool = False,
     margins_name: Hashable = "All",
     dropna: bool = True,
-    normalize: bool = False,
+    normalize: bool | Literal[0, 1, "all", "index", "columns"] = False,
 ) -> DataFrame:
     """
     Compute a simple cross tabulation of two (or more) factors.
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index fc8d827cd31bb..bf7c7a1ee4dc7 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -908,7 +908,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
             data = frame.copy()
         else:
             # Take the data from frame corresponding to this idx value
-            if not isinstance(idx, tuple):
+            if len(level) == 1:
                 idx = (idx,)
             gen = iter(idx)
             column_indexer = tuple(
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 43eea7c669ce7..126f589f5df71 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -43,7 +43,6 @@
     to_datetime,
     to_timedelta,
 )
-from pandas.core import nanops
 import pandas.core.algorithms as algos
 
 if TYPE_CHECKING:
@@ -243,43 +242,18 @@ def cut(
     # NOTE: this binning code is changed a bit from histogram for var(x) == 0
 
     original = x
-    x = _preprocess_for_cut(x)
-    x, dtype = _coerce_to_type(x)
+    x_idx = _preprocess_for_cut(x)
+    x_idx, dtype = _coerce_to_type(x_idx)
 
     if not np.iterable(bins):
-        if is_scalar(bins) and bins < 1:
-            raise ValueError("`bins` should be a positive integer.")
-
-        sz = x.size
-
-        if sz == 0:
-            raise ValueError("Cannot cut empty array")
-
-        rng = (nanops.nanmin(x), nanops.nanmax(x))
-        mn, mx = (mi + 0.0 for mi in rng)
-
-        if np.isinf(mn) or np.isinf(mx):
-            # GH 24314
-            raise ValueError(
-                "cannot specify integer `bins` when input data contains infinity"
-            )
-        if mn == mx:  # adjust end points before binning
-            mn -= 0.001 * abs(mn) if mn != 0 else 0.001
-            mx += 0.001 * abs(mx) if mx != 0 else 0.001
-            bins = np.linspace(mn, mx, bins + 1, endpoint=True)
-        else:  # adjust end points after binning
-            bins = np.linspace(mn, mx, bins + 1, endpoint=True)
-            adj = (mx - mn) * 0.001  # 0.1% of the range
-            if right:
-                bins[0] -= adj
-            else:
-                bins[-1] += adj
+        bins = _nbins_to_bins(x_idx, bins, right)
 
     elif isinstance(bins, IntervalIndex):
         if bins.is_overlapping:
             raise ValueError("Overlapping IntervalIndex is not accepted.")
 
     else:
+        bins = Index(bins)
         if isinstance(getattr(bins, "dtype", None), DatetimeTZDtype):
             bins = np.asarray(bins, dtype=DT64NS_DTYPE)
         else:
@@ -289,9 +263,10 @@ def cut(
         # GH 26045: cast to float64 to avoid an overflow
         if (np.diff(bins.astype("float64")) < 0).any():
             raise ValueError("bins must increase monotonically.")
+        bins = Index(bins)
 
     fac, bins = _bins_to_cuts(
-        x,
+        x_idx,
         bins,
         right=right,
         labels=labels,
@@ -367,18 +342,18 @@ def qcut(
     array([0, 0, 1, 2, 3])
     """
     original = x
-    x = _preprocess_for_cut(x)
-    x, dtype = _coerce_to_type(x)
+    x_idx = _preprocess_for_cut(x)
+    x_idx, dtype = _coerce_to_type(x_idx)
 
     quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q
 
-    x_np = np.asarray(x)
+    x_np = np.asarray(x_idx)
     x_np = x_np[~np.isnan(x_np)]
     bins = np.quantile(x_np, quantiles)
 
     fac, bins = _bins_to_cuts(
-        x,
-        bins,
+        x_idx,
+        Index(bins),
         labels=labels,
         precision=precision,
         include_lowest=True,
@@ -389,9 +364,44 @@ def qcut(
     return _postprocess_for_cut(fac, bins, retbins, dtype, original)
 
 
+def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index:
+    """
+    If a user passed an integer N for bins, convert this to a sequence of N
+    equal(ish)-sized bins.
+    """
+    if is_scalar(nbins) and nbins < 1:
+        raise ValueError("`bins` should be a positive integer.")
+
+    if x_idx.size == 0:
+        raise ValueError("Cannot cut empty array")
+
+    rng = (x_idx.min(), x_idx.max())
+    mn, mx = rng
+
+    if np.isinf(mn) or np.isinf(mx):
+        # GH#24314
+        raise ValueError(
+            "cannot specify integer `bins` when input data contains infinity"
+        )
+
+    if mn == mx:  # adjust end points before binning
+        mn -= 0.001 * abs(mn) if mn != 0 else 0.001
+        mx += 0.001 * abs(mx) if mx != 0 else 0.001
+        bins = np.linspace(mn, mx, nbins + 1, endpoint=True)
+    else:  # adjust end points after binning
+        bins = np.linspace(mn, mx, nbins + 1, endpoint=True)
+        adj = (mx - mn) * 0.001  # 0.1% of the range
+        if right:
+            bins[0] -= adj
+        else:
+            bins[-1] += adj
+
+    return Index(bins)
+
+
 def _bins_to_cuts(
-    x,
-    bins: np.ndarray,
+    x: Index,
+    bins: Index,
     right: bool = True,
     labels=None,
     precision: int = 3,
@@ -408,6 +418,8 @@ def _bins_to_cuts(
             "invalid value for 'duplicates' parameter, valid options are: raise, drop"
         )
 
+    result: Categorical | np.ndarray
+
     if isinstance(bins, IntervalIndex):
         # we have a fast-path here
         ids = bins.get_indexer(x)
@@ -474,7 +486,7 @@ def _bins_to_cuts(
     return result, bins
 
 
-def _coerce_to_type(x):
+def _coerce_to_type(x: Index) -> tuple[Index, DtypeObj | None]:
     """
     if the passed data is of datetime/timedelta, bool or nullable int type,
     this method converts it to numeric so that cut or qcut method can
@@ -498,11 +510,13 @@ def _coerce_to_type(x):
     # https://github.com/pandas-dev/pandas/pull/31290
     # https://github.com/pandas-dev/pandas/issues/31389
     elif isinstance(x.dtype, ExtensionDtype) and is_numeric_dtype(x.dtype):
-        x = x.to_numpy(dtype=np.float64, na_value=np.nan)
+        x_arr = x.to_numpy(dtype=np.float64, na_value=np.nan)
+        x = Index(x_arr)
 
     if dtype is not None:
         # GH 19768: force NaT to NaN during integer conversion
-        x = np.where(x.notna(), x.view(np.int64), np.nan)
+        x_arr = np.where(x.notna(), x.view(np.int64), np.nan)
+        x = Index(x_arr)
 
     return x, dtype
 
@@ -564,7 +578,7 @@ def _convert_bin_to_datelike_type(bins, dtype: DtypeObj | None):
 
 
 def _format_labels(
-    bins,
+    bins: Index,
     precision: int,
     right: bool = True,
     include_lowest: bool = False,
@@ -597,7 +611,7 @@ def _format_labels(
     return IntervalIndex.from_breaks(breaks, closed=closed)
 
 
-def _preprocess_for_cut(x):
+def _preprocess_for_cut(x) -> Index:
     """
     handles preprocessing for cut where we convert passed
     input to array, strip the index information and store it
@@ -611,7 +625,7 @@ def _preprocess_for_cut(x):
     if x.ndim != 1:
         raise ValueError("Input array must be 1 dimensional")
 
-    return x
+    return Index(x)
 
 
 def _postprocess_for_cut(fac, bins, retbins: bool, dtype: DtypeObj | None, original):
@@ -627,6 +641,8 @@ def _postprocess_for_cut(fac, bins, retbins: bool, dtype: DtypeObj | None, origi
         return fac
 
     bins = _convert_bin_to_datelike_type(bins, dtype)
+    if isinstance(bins, Index) and is_numeric_dtype(bins.dtype):
+        bins = bins._values
 
     return fac, bins
 
@@ -646,7 +662,7 @@ def _round_frac(x, precision: int):
         return np.around(x, digits)
 
 
-def _infer_precision(base_precision: int, bins) -> int:
+def _infer_precision(base_precision: int, bins: Index) -> int:
     """
     Infer an appropriate precision for _round_frac
     """
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index 915595833468d..3b2ae5daffdba 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -28,4 +28,7 @@ def _arrow_dtype_mapping() -> dict:
 def arrow_string_types_mapper() -> Callable:
     pa = import_optional_dependency("pyarrow")
 
-    return {pa.string(): pd.StringDtype(storage="pyarrow_numpy")}.get
+    return {
+        pa.string(): pd.StringDtype(storage="pyarrow_numpy"),
+        pa.large_string(): pd.StringDtype(storage="pyarrow_numpy"),
+    }.get
diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py
index 806d42381afc6..6491849925e86 100644
--- a/pandas/io/clipboard/__init__.py
+++ b/pandas/io/clipboard/__init__.py
@@ -17,9 +17,12 @@
 On Windows, no additional modules are needed.
 On Mac, the pyobjc module is used, falling back to the pbcopy and pbpaste cli
     commands. (These commands should come with OS X.).
-On Linux, install xclip or xsel via package manager. For example, in Debian:
+On Linux, install xclip, xsel, or wl-clipboard (for "wayland" sessions) via
+package manager.
+For example, in Debian:
     sudo apt-get install xclip
     sudo apt-get install xsel
+    sudo apt-get install wl-clipboard
 
 Otherwise on Linux, you will need the PyQt5 modules installed.
 
@@ -28,12 +31,11 @@
 Cygwin is currently not supported.
 
 Security Note: This module runs programs with these names:
-    - which
-    - where
     - pbcopy
     - pbpaste
     - xclip
     - xsel
+    - wl-copy/wl-paste
     - klipper
     - qdbus
 A malicious user could rename or add programs with these names, tricking
@@ -41,7 +43,7 @@
 
 """
 
-__version__ = "1.7.0"
+__version__ = "1.8.2"
 
 
 import contextlib
@@ -55,7 +57,7 @@
 )
 import os
 import platform
-from shutil import which
+from shutil import which as _executable_exists
 import subprocess
 import time
 import warnings
@@ -74,25 +76,14 @@
 EXCEPT_MSG = """
     Pyperclip could not find a copy/paste mechanism for your system.
     For more information, please visit
-    https://pyperclip.readthedocs.io/en/latest/#not-implemented-error
+    https://pyperclip.readthedocs.io/en/latest/index.html#not-implemented-error
     """
 
 ENCODING = "utf-8"
 
-# The "which" unix command finds where a command is.
-if platform.system() == "Windows":
-    WHICH_CMD = "where"
-else:
-    WHICH_CMD = "which"
 
-
-def _executable_exists(name):
-    return (
-        subprocess.call(
-            [WHICH_CMD, name], stdout=subprocess.PIPE, stderr=subprocess.PIPE
-        )
-        == 0
-    )
+class PyperclipTimeoutException(PyperclipException):
+    pass
 
 
 def _stringifyText(text) -> str:
@@ -229,6 +220,32 @@ def paste_xsel(primary=False):
     return copy_xsel, paste_xsel
 
 
+def init_wl_clipboard():
+    PRIMARY_SELECTION = "-p"
+
+    def copy_wl(text, primary=False):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        args = ["wl-copy"]
+        if primary:
+            args.append(PRIMARY_SELECTION)
+        if not text:
+            args.append("--clear")
+            subprocess.check_call(args, close_fds=True)
+        else:
+            p = subprocess.Popen(args, stdin=subprocess.PIPE, close_fds=True)
+            p.communicate(input=text.encode(ENCODING))
+
+    def paste_wl(primary=False):
+        args = ["wl-paste", "-n"]
+        if primary:
+            args.append(PRIMARY_SELECTION)
+        p = subprocess.Popen(args, stdout=subprocess.PIPE, close_fds=True)
+        stdout, _stderr = p.communicate()
+        return stdout.decode(ENCODING)
+
+    return copy_wl, paste_wl
+
+
 def init_klipper_clipboard():
     def copy_klipper(text):
         text = _stringifyText(text)  # Converts non-str values to str.
@@ -534,7 +551,7 @@ def determine_clipboard():
         return init_windows_clipboard()
 
     if platform.system() == "Linux":
-        if which("wslconfig.exe"):
+        if _executable_exists("wslconfig.exe"):
             return init_wsl_clipboard()
 
     # Setup for the macOS platform:
@@ -549,6 +566,8 @@ def determine_clipboard():
 
     # Setup for the LINUX platform:
     if HAS_DISPLAY:
+        if os.environ.get("WAYLAND_DISPLAY") and _executable_exists("wl-copy"):
+            return init_wl_clipboard()
         if _executable_exists("xsel"):
             return init_xsel_clipboard()
         if _executable_exists("xclip"):
@@ -602,6 +621,7 @@ def set_clipboard(clipboard):
         "qt": init_qt_clipboard,  # TODO - split this into 'qtpy', 'pyqt4', and 'pyqt5'
         "xclip": init_xclip_clipboard,
         "xsel": init_xsel_clipboard,
+        "wl-clipboard": init_wl_clipboard,
         "klipper": init_klipper_clipboard,
         "windows": init_windows_clipboard,
         "no": init_no_clipboard,
@@ -671,7 +691,56 @@ def is_available() -> bool:
 copy, paste = lazy_load_stub_copy, lazy_load_stub_paste
 
 
-__all__ = ["copy", "paste", "set_clipboard", "determine_clipboard"]
+def waitForPaste(timeout=None):
+    """This function call blocks until a non-empty text string exists on the
+    clipboard. It returns this text.
+
+    This function raises PyperclipTimeoutException if timeout was set to
+    a number of seconds that has elapsed without non-empty text being put on
+    the clipboard."""
+    startTime = time.time()
+    while True:
+        clipboardText = paste()
+        if clipboardText != "":
+            return clipboardText
+        time.sleep(0.01)
+
+        if timeout is not None and time.time() > startTime + timeout:
+            raise PyperclipTimeoutException(
+                "waitForPaste() timed out after " + str(timeout) + " seconds."
+            )
+
+
+def waitForNewPaste(timeout=None):
+    """This function call blocks until a new text string exists on the
+    clipboard that is different from the text that was there when the function
+    was first called. It returns this text.
+
+    This function raises PyperclipTimeoutException if timeout was set to
+    a number of seconds that has elapsed without non-empty text being put on
+    the clipboard."""
+    startTime = time.time()
+    originalText = paste()
+    while True:
+        currentText = paste()
+        if currentText != originalText:
+            return currentText
+        time.sleep(0.01)
+
+        if timeout is not None and time.time() > startTime + timeout:
+            raise PyperclipTimeoutException(
+                "waitForNewPaste() timed out after " + str(timeout) + " seconds."
+            )
+
+
+__all__ = [
+    "copy",
+    "paste",
+    "waitForPaste",
+    "waitForNewPaste",
+    "set_clipboard",
+    "determine_clipboard",
+]
 
 # pandas aliases
 clipboard_get = paste
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 9ffbfb9f1149f..073115cab8695 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import abc
 from collections.abc import (
     Hashable,
     Iterable,
@@ -160,13 +159,15 @@
     of dtype conversion.
 engine : str, default None
     If io is not a buffer or path, this must be set to identify io.
-    Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
+    Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", "calamine".
     Engine compatibility :
 
     - "xlrd" supports old-style Excel files (.xls).
     - "openpyxl" supports newer Excel file formats.
     - "odf" supports OpenDocument file formats (.odf, .ods, .odt).
     - "pyxlsb" supports Binary Excel files.
+    - "calamine" supports Excel (.xls, .xlsx, .xlsm, .xlsb)
+      and OpenDocument (.ods) file formats.
 
     .. versionchanged:: 1.2.0
         The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
@@ -395,7 +396,7 @@ def read_excel(
     | Callable[[str], bool]
     | None = ...,
     dtype: DtypeArg | None = ...,
-    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = ...,
     converters: dict[str, Callable] | dict[int, Callable] | None = ...,
     true_values: Iterable[Hashable] | None = ...,
     false_values: Iterable[Hashable] | None = ...,
@@ -434,7 +435,7 @@ def read_excel(
     | Callable[[str], bool]
     | None = ...,
     dtype: DtypeArg | None = ...,
-    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = ...,
     converters: dict[str, Callable] | dict[int, Callable] | None = ...,
     true_values: Iterable[Hashable] | None = ...,
     false_values: Iterable[Hashable] | None = ...,
@@ -473,7 +474,7 @@ def read_excel(
     | Callable[[str], bool]
     | None = None,
     dtype: DtypeArg | None = None,
-    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = None,
     converters: dict[str, Callable] | dict[int, Callable] | None = None,
     true_values: Iterable[Hashable] | None = None,
     false_values: Iterable[Hashable] | None = None,
@@ -549,7 +550,7 @@ def read_excel(
 _WorkbookT = TypeVar("_WorkbookT")
 
 
-class BaseExcelReader(Generic[_WorkbookT], metaclass=abc.ABCMeta):
+class BaseExcelReader(Generic[_WorkbookT]):
     book: _WorkbookT
 
     def __init__(
@@ -589,13 +590,11 @@ def __init__(
             )
 
     @property
-    @abc.abstractmethod
     def _workbook_class(self) -> type[_WorkbookT]:
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def load_workbook(self, filepath_or_buffer, engine_kwargs) -> _WorkbookT:
-        pass
+        raise NotImplementedError
 
     def close(self) -> None:
         if hasattr(self, "book"):
@@ -611,21 +610,17 @@ def close(self) -> None:
         self.handles.close()
 
     @property
-    @abc.abstractmethod
     def sheet_names(self) -> list[str]:
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def get_sheet_by_name(self, name: str):
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def get_sheet_by_index(self, index: int):
-        pass
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def get_sheet_data(self, sheet, rows: int | None = None):
-        pass
+        raise NotImplementedError
 
     def raise_if_bad_sheet_by_index(self, index: int) -> None:
         n_sheets = len(self.sheet_names)
@@ -940,7 +935,7 @@ def parse(
 
 
 @doc(storage_options=_shared_docs["storage_options"])
-class ExcelWriter(Generic[_WorkbookT], metaclass=abc.ABCMeta):
+class ExcelWriter(Generic[_WorkbookT]):
     """
     Class for writing DataFrame objects into excel sheets.
 
@@ -1178,20 +1173,19 @@ def engine(self) -> str:
         return self._engine
 
     @property
-    @abc.abstractmethod
     def sheets(self) -> dict[str, Any]:
         """Mapping of sheet names to sheet objects."""
+        raise NotImplementedError
 
     @property
-    @abc.abstractmethod
     def book(self) -> _WorkbookT:
         """
         Book instance. Class type will depend on the engine used.
 
         This attribute can be used to access engine-specific features.
         """
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def _write_cells(
         self,
         cells,
@@ -1214,12 +1208,13 @@ def _write_cells(
         freeze_panes: int tuple of length 2
             contains the bottom-most row and right-most column to freeze
         """
+        raise NotImplementedError
 
-    @abc.abstractmethod
     def _save(self) -> None:
         """
         Save workbook to disk.
         """
+        raise NotImplementedError
 
     def __init__(
         self,
@@ -1463,13 +1458,15 @@ class ExcelFile:
         .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
     engine : str, default None
         If io is not a buffer or path, this must be set to identify io.
-        Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
+        Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``, ``calamine``
         Engine compatibility :
 
         - ``xlrd`` supports old-style Excel files (.xls).
         - ``openpyxl`` supports newer Excel file formats.
         - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
         - ``pyxlsb`` supports Binary Excel files.
+        - ``calamine`` supports Excel (.xls, .xlsx, .xlsm, .xlsb)
+          and OpenDocument (.ods) file formats.
 
         .. versionchanged:: 1.2.0
 
@@ -1505,6 +1502,7 @@ class ExcelFile:
     ...     df1 = pd.read_excel(xls, "Sheet1")  # doctest: +SKIP
     """
 
+    from pandas.io.excel._calamine import CalamineReader
     from pandas.io.excel._odfreader import ODFReader
     from pandas.io.excel._openpyxl import OpenpyxlReader
     from pandas.io.excel._pyxlsb import PyxlsbReader
@@ -1515,6 +1513,7 @@ class ExcelFile:
         "openpyxl": OpenpyxlReader,
         "odf": ODFReader,
         "pyxlsb": PyxlsbReader,
+        "calamine": CalamineReader,
     }
 
     def __init__(
diff --git a/pandas/io/excel/_calamine.py b/pandas/io/excel/_calamine.py
new file mode 100644
index 0000000000000..d61a9fc664164
--- /dev/null
+++ b/pandas/io/excel/_calamine.py
@@ -0,0 +1,127 @@
+from __future__ import annotations
+
+from datetime import (
+    date,
+    datetime,
+    time,
+    timedelta,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    cast,
+)
+
+from pandas._typing import Scalar
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import doc
+
+import pandas as pd
+from pandas.core.shared_docs import _shared_docs
+
+from pandas.io.excel._base import BaseExcelReader
+
+if TYPE_CHECKING:
+    from python_calamine import (
+        CalamineSheet,
+        CalamineWorkbook,
+    )
+
+    from pandas._typing import (
+        FilePath,
+        ReadBuffer,
+        StorageOptions,
+    )
+
+_CellValueT = Union[int, float, str, bool, time, date, datetime, timedelta]
+
+
+class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
+    @doc(storage_options=_shared_docs["storage_options"])
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        storage_options: StorageOptions | None = None,
+        engine_kwargs: dict | None = None,
+    ) -> None:
+        """
+        Reader using calamine engine (xlsx/xls/xlsb/ods).
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path to be parsed or
+            an open readable stream.
+        {storage_options}
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        """
+        import_optional_dependency("python_calamine")
+        super().__init__(
+            filepath_or_buffer,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+        )
+
+    @property
+    def _workbook_class(self) -> type[CalamineWorkbook]:
+        from python_calamine import CalamineWorkbook
+
+        return CalamineWorkbook
+
+    def load_workbook(
+        self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any
+    ) -> CalamineWorkbook:
+        from python_calamine import load_workbook
+
+        return load_workbook(
+            filepath_or_buffer, **engine_kwargs  # type: ignore[arg-type]
+        )
+
+    @property
+    def sheet_names(self) -> list[str]:
+        from python_calamine import SheetTypeEnum
+
+        return [
+            sheet.name
+            for sheet in self.book.sheets_metadata
+            if sheet.typ == SheetTypeEnum.WorkSheet
+        ]
+
+    def get_sheet_by_name(self, name: str) -> CalamineSheet:
+        self.raise_if_bad_sheet_by_name(name)
+        return self.book.get_sheet_by_name(name)
+
+    def get_sheet_by_index(self, index: int) -> CalamineSheet:
+        self.raise_if_bad_sheet_by_index(index)
+        return self.book.get_sheet_by_index(index)
+
+    def get_sheet_data(
+        self, sheet: CalamineSheet, file_rows_needed: int | None = None
+    ) -> list[list[Scalar]]:
+        def _convert_cell(value: _CellValueT) -> Scalar:
+            if isinstance(value, float):
+                val = int(value)
+                if val == value:
+                    return val
+                else:
+                    return value
+            elif isinstance(value, date):
+                return pd.Timestamp(value)
+            elif isinstance(value, timedelta):
+                return pd.Timedelta(value)
+            elif isinstance(value, time):
+                # cast needed here because Scalar doesn't include datetime.time
+                return cast(Scalar, value)
+
+            return value
+
+        rows: list[list[_CellValueT]] = sheet.to_python(skip_empty_area=False)
+        data: list[list[Scalar]] = []
+
+        for row in rows:
+            data.append([_convert_cell(cell) for cell in row])
+            if file_rows_needed is not None and len(data) >= file_rows_needed:
+                break
+
+        return data
diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py
index 74cbe90acdae8..bc7dca2d95b6b 100644
--- a/pandas/io/excel/_odswriter.py
+++ b/pandas/io/excel/_odswriter.py
@@ -192,7 +192,15 @@ def _make_table_cell(self, cell) -> tuple[object, Any]:
         if isinstance(val, bool):
             value = str(val).lower()
             pvalue = str(val).upper()
-        if isinstance(val, datetime.datetime):
+            return (
+                pvalue,
+                TableCell(
+                    valuetype="boolean",
+                    booleanvalue=value,
+                    attributes=attributes,
+                ),
+            )
+        elif isinstance(val, datetime.datetime):
             # Fast formatting
             value = val.isoformat()
             # Slow but locale-dependent
@@ -210,17 +218,20 @@ def _make_table_cell(self, cell) -> tuple[object, Any]:
                 pvalue,
                 TableCell(valuetype="date", datevalue=value, attributes=attributes),
             )
+        elif isinstance(val, str):
+            return (
+                pvalue,
+                TableCell(
+                    valuetype="string",
+                    stringvalue=value,
+                    attributes=attributes,
+                ),
+            )
         else:
-            class_to_cell_type = {
-                str: "string",
-                int: "float",
-                float: "float",
-                bool: "boolean",
-            }
             return (
                 pvalue,
                 TableCell(
-                    valuetype=class_to_cell_type[type(val)],
+                    valuetype="float",
                     value=value,
                     attributes=attributes,
                 ),
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index 9970d465ced9d..b344d9849f16c 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -941,9 +941,7 @@ def write(
         if isinstance(writer, ExcelWriter):
             need_save = False
         else:
-            # error: Cannot instantiate abstract class 'ExcelWriter' with abstract
-            # attributes 'engine', 'save', 'supported_extensions' and 'write_cells'
-            writer = ExcelWriter(  # type: ignore[abstract]
+            writer = ExcelWriter(
                 writer,
                 engine=engine,
                 storage_options=storage_options,
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 10701be4f7e0b..68d30fe5ba681 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -1033,7 +1033,7 @@ def read_html(
     io: FilePath | ReadBuffer[str],
     *,
     match: str | Pattern = ".+",
-    flavor: str | None = None,
+    flavor: str | Sequence[str] | None = None,
     header: int | Sequence[int] | None = None,
     index_col: int | Sequence[int] | None = None,
     skiprows: int | Sequence[int] | slice | None = None,
@@ -1074,11 +1074,11 @@ def read_html(
         This value is converted to a regular expression so that there is
         consistent behavior between Beautiful Soup and lxml.
 
-    flavor : str, optional
-        The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
-        each other, they are both there for backwards compatibility. The
-        default of ``None`` tries to use ``lxml`` to parse and if that fails it
-        falls back on ``bs4`` + ``html5lib``.
+    flavor : str or list-like, optional
+        The parsing engine (or list of parsing engines) to use. 'bs4' and
+        'html5lib' are synonymous with each other, they are both there for
+        backwards compatibility. The default of ``None`` tries to use ``lxml``
+        to parse and if that fails it falls back on ``bs4`` + ``html5lib``.
 
     header : int or list-like, optional
         The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 833f4986b6da6..52ea072d1483f 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -82,6 +82,7 @@
         JSONEngine,
         JSONSerializable,
         ReadBuffer,
+        Self,
         StorageOptions,
         WriteBuffer,
     )
@@ -1056,7 +1057,7 @@ def close(self) -> None:
         if self.handles is not None:
             self.handles.close()
 
-    def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]:
+    def __iter__(self) -> Self:
         return self
 
     @overload
@@ -1099,7 +1100,7 @@ def __next__(self) -> DataFrame | Series:
         else:
             return obj
 
-    def __enter__(self) -> JsonReader[FrameSeriesStrT]:
+    def __enter__(self) -> Self:
         return self
 
     def __exit__(
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 6846ea2b196b8..43fb4ec3b55fc 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -13,7 +13,6 @@
 import csv
 from io import StringIO
 import re
-import sys
 from typing import (
     IO,
     TYPE_CHECKING,
@@ -21,6 +20,7 @@
     Literal,
     cast,
 )
+import warnings
 
 import numpy as np
 
@@ -28,8 +28,10 @@
 from pandas.errors import (
     EmptyDataError,
     ParserError,
+    ParserWarning,
 )
 from pandas.util._decorators import cache_readonly
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     is_bool_dtype,
@@ -778,8 +780,11 @@ def _alert_malformed(self, msg: str, row_num: int) -> None:
         if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
             raise ParserError(msg)
         if self.on_bad_lines == self.BadLineHandleMethod.WARN:
-            base = f"Skipping line {row_num}: "
-            sys.stderr.write(base + msg + "\n")
+            warnings.warn(
+                f"Skipping line {row_num}: {msg}\n",
+                ParserWarning,
+                stacklevel=find_stack_level(),
+            )
 
     def _next_iter_line(self, row_num: int) -> list[Scalar] | None:
         """
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 10d3ab230cb9d..e826aad478059 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -638,7 +638,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -697,7 +700,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -757,7 +763,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -817,7 +826,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -888,7 +900,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = "infer",
     names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default,
     index_col: IndexLabel | Literal[False] | None = None,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = None,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = None,
     # General Parsing Configuration
     dtype: DtypeArg | None = None,
     engine: CSVEngine | None = None,
@@ -983,7 +998,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1040,7 +1058,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1097,7 +1118,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1154,7 +1178,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1224,7 +1251,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = "infer",
     names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default,
     index_col: IndexLabel | Literal[False] | None = None,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = None,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = None,
     # General Parsing Configuration
     dtype: DtypeArg | None = None,
     engine: CSVEngine | None = None,
@@ -1307,6 +1337,51 @@ def read_table(
     return _read(filepath_or_buffer, kwds)
 
 
+@overload
+def read_fwf(
+    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    *,
+    colspecs: Sequence[tuple[int, int]] | str | None = ...,
+    widths: Sequence[int] | None = ...,
+    infer_nrows: int = ...,
+    dtype_backend: DtypeBackend | lib.NoDefault = ...,
+    iterator: Literal[True],
+    chunksize: int | None = ...,
+    **kwds,
+) -> TextFileReader:
+    ...
+
+
+@overload
+def read_fwf(
+    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    *,
+    colspecs: Sequence[tuple[int, int]] | str | None = ...,
+    widths: Sequence[int] | None = ...,
+    infer_nrows: int = ...,
+    dtype_backend: DtypeBackend | lib.NoDefault = ...,
+    iterator: bool = ...,
+    chunksize: int,
+    **kwds,
+) -> TextFileReader:
+    ...
+
+
+@overload
+def read_fwf(
+    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    *,
+    colspecs: Sequence[tuple[int, int]] | str | None = ...,
+    widths: Sequence[int] | None = ...,
+    infer_nrows: int = ...,
+    dtype_backend: DtypeBackend | lib.NoDefault = ...,
+    iterator: Literal[False] = ...,
+    chunksize: None = ...,
+    **kwds,
+) -> DataFrame:
+    ...
+
+
 def read_fwf(
     filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
     *,
@@ -1314,6 +1389,8 @@ def read_fwf(
     widths: Sequence[int] | None = None,
     infer_nrows: int = 100,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+    iterator: bool = False,
+    chunksize: int | None = None,
     **kwds,
 ) -> DataFrame | TextFileReader:
     r"""
@@ -1412,6 +1489,8 @@ def read_fwf(
     kwds["colspecs"] = colspecs
     kwds["infer_nrows"] = infer_nrows
     kwds["engine"] = "python-fwf"
+    kwds["iterator"] = iterator
+    kwds["chunksize"] = chunksize
 
     check_dtype_backend(dtype_backend)
     kwds["dtype_backend"] = dtype_backend
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 7669d5aa4cea5..0788d9da06eb9 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -138,7 +138,7 @@ def _parse_date_columns(data_frame, parse_dates):
         if isinstance(df_col.dtype, DatetimeTZDtype) or col_name in parse_dates:
             try:
                 fmt = parse_dates[col_name]
-            except TypeError:
+            except (KeyError, TypeError):
                 fmt = None
             data_frame.isetitem(i, _handle_date_column(df_col, format=fmt))
 
@@ -2091,13 +2091,11 @@ def _adapt_time(t) -> str:
 
         adapt_date_iso = lambda val: val.isoformat()
         adapt_datetime_iso = lambda val: val.isoformat()
-        adapt_datetime_epoch = lambda val: int(val.timestamp())
 
         sqlite3.register_adapter(time, _adapt_time)
 
         sqlite3.register_adapter(date, adapt_date_iso)
         sqlite3.register_adapter(datetime, adapt_datetime_iso)
-        sqlite3.register_adapter(datetime, adapt_datetime_epoch)
 
         convert_date = lambda val: date.fromisoformat(val.decode())
         convert_datetime = lambda val: datetime.fromisoformat(val.decode())
diff --git a/pandas/meson.build b/pandas/meson.build
index 1dc9955aa4ff6..f02258c98d46a 100644
--- a/pandas/meson.build
+++ b/pandas/meson.build
@@ -40,8 +40,9 @@ subdirs_list = [
     'util'
 ]
 foreach subdir: subdirs_list
-    install_subdir(subdir, install_dir: py.get_install_dir(pure: false) / 'pandas')
+    install_subdir(subdir, install_dir: py.get_install_dir() / 'pandas')
 endforeach
+
 top_level_py_list = [
     '__init__.py',
     '_typing.py',
@@ -49,8 +50,4 @@ top_level_py_list = [
     'conftest.py',
     'testing.py'
 ]
-foreach file: top_level_py_list
-    py.install_sources(file,
-                       pure: false,
-                       subdir: 'pandas')
-endforeach
+py.install_sources(top_level_py_list, subdir: 'pandas')
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3a3f73a68374b..227b72573f979 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -18,6 +18,13 @@
 from pandas.tests.frame.common import zip_frames
 
 
+@pytest.fixture(params=["python", "numba"])
+def engine(request):
+    if request.param == "numba":
+        pytest.importorskip("numba")
+    return request.param
+
+
 def test_apply(float_frame):
     with np.errstate(all="ignore"):
         # ufunc
@@ -38,8 +45,9 @@ def test_apply(float_frame):
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_args(float_frame, axis):
-    result = float_frame.apply(lambda x, y: x + y, axis, args=(1,))
+@pytest.mark.parametrize("raw", [True, False])
+def test_apply_args(float_frame, axis, raw):
+    result = float_frame.apply(lambda x, y: x + y, axis, args=(1,), raw=raw)
     expected = float_frame + 1
     tm.assert_frame_equal(result, expected)
 
@@ -234,36 +242,42 @@ def test_apply_broadcast_series_lambda_func(int_frame_const_col):
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_raw_float_frame(float_frame, axis):
+def test_apply_raw_float_frame(float_frame, axis, engine):
+    if engine == "numba":
+        pytest.skip("numba can't handle when UDF returns None.")
+
     def _assert_raw(x):
         assert isinstance(x, np.ndarray)
         assert x.ndim == 1
 
-    float_frame.apply(_assert_raw, axis=axis, raw=True)
+    float_frame.apply(_assert_raw, axis=axis, engine=engine, raw=True)
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_raw_float_frame_lambda(float_frame, axis):
-    result = float_frame.apply(np.mean, axis=axis, raw=True)
+def test_apply_raw_float_frame_lambda(float_frame, axis, engine):
+    result = float_frame.apply(np.mean, axis=axis, engine=engine, raw=True)
     expected = float_frame.apply(lambda x: x.values.mean(), axis=axis)
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_raw_float_frame_no_reduction(float_frame):
+def test_apply_raw_float_frame_no_reduction(float_frame, engine):
     # no reduction
-    result = float_frame.apply(lambda x: x * 2, raw=True)
+    result = float_frame.apply(lambda x: x * 2, engine=engine, raw=True)
     expected = float_frame * 2
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_raw_mixed_type_frame(mixed_type_frame, axis):
+def test_apply_raw_mixed_type_frame(mixed_type_frame, axis, engine):
+    if engine == "numba":
+        pytest.skip("isinstance check doesn't work with numba")
+
     def _assert_raw(x):
         assert isinstance(x, np.ndarray)
         assert x.ndim == 1
 
     # Mixed dtype (GH-32423)
-    mixed_type_frame.apply(_assert_raw, axis=axis, raw=True)
+    mixed_type_frame.apply(_assert_raw, axis=axis, engine=engine, raw=True)
 
 
 def test_apply_axis1(float_frame):
@@ -300,14 +314,20 @@ def test_apply_mixed_dtype_corner_indexing():
 )
 @pytest.mark.parametrize("raw", [True, False])
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_empty_infer_type(ax, func, raw, axis):
+def test_apply_empty_infer_type(ax, func, raw, axis, engine, request):
     df = DataFrame(**{ax: ["a", "b", "c"]})
 
     with np.errstate(all="ignore"):
         test_res = func(np.array([], dtype="f8"))
         is_reduction = not isinstance(test_res, np.ndarray)
 
-        result = df.apply(func, axis=axis, raw=raw)
+        if engine == "numba" and raw is False:
+            mark = pytest.mark.xfail(
+                reason="numba engine only supports raw=True at the moment"
+            )
+            request.node.add_marker(mark)
+
+        result = df.apply(func, axis=axis, engine=engine, raw=raw)
         if is_reduction:
             agg_axis = df._get_agg_axis(axis)
             assert isinstance(result, Series)
@@ -607,8 +627,10 @@ def non_reducing_function(row):
         assert names == list(df.index)
 
 
-def test_apply_raw_function_runs_once():
+def test_apply_raw_function_runs_once(engine):
     # https://github.com/pandas-dev/pandas/issues/34506
+    if engine == "numba":
+        pytest.skip("appending to list outside of numba func is not supported")
 
     df = DataFrame({"a": [1, 2, 3]})
     values = []  # Save row values function is applied to
@@ -623,7 +645,7 @@ def non_reducing_function(row):
     for func in [reducing_function, non_reducing_function]:
         del values[:]
 
-        df.apply(func, raw=True, axis=1)
+        df.apply(func, engine=engine, raw=True, axis=1)
         assert values == list(df.a.to_list())
 
 
@@ -1449,10 +1471,12 @@ def test_apply_no_suffix_index():
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_raw_returns_string():
+def test_apply_raw_returns_string(engine):
     # https://github.com/pandas-dev/pandas/issues/35940
+    if engine == "numba":
+        pytest.skip("No object dtype support in numba")
     df = DataFrame({"A": ["aa", "bbb"]})
-    result = df.apply(lambda x: x[0], axis=1, raw=True)
+    result = df.apply(lambda x: x[0], engine=engine, axis=1, raw=True)
     expected = Series(["aa", "bbb"])
     tm.assert_series_equal(result, expected)
 
@@ -1632,3 +1656,14 @@ def test_agg_dist_like_and_nonunique_columns():
     result = df.agg({"A": "count"})
     expected = df["A"].count()
     tm.assert_series_equal(result, expected)
+
+
+def test_numba_unsupported():
+    df = DataFrame(
+        {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
+    )
+    with pytest.raises(
+        ValueError,
+        match="The numba engine in DataFrame.apply can only be used when raw=True",
+    ):
+        df.apply(lambda x: x, engine="numba", raw=False)
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 2c96d874fb3d4..c0ff0a77f33a7 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -377,8 +377,16 @@ def test_astype_int(dtype):
     tm.assert_numpy_array_equal(result, expected)
 
     arr = pd.array(["1", pd.NA, "3"], dtype=dtype)
-    msg = r"int\(\) argument must be a string, a bytes-like object or a( real)? number"
-    with pytest.raises(TypeError, match=msg):
+    if dtype.storage == "pyarrow_numpy":
+        err = ValueError
+        msg = "cannot convert float NaN to integer"
+    else:
+        err = TypeError
+        msg = (
+            r"int\(\) argument must be a string, a bytes-like "
+            r"object or a( real)? number"
+        )
+    with pytest.raises(err, match=msg):
         arr.astype("int64")
 
 
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index 1ab628f186b47..c1d424f12bfc4 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -12,7 +12,10 @@
     StringArray,
     StringDtype,
 )
-from pandas.core.arrays.string_arrow import ArrowStringArray
+from pandas.core.arrays.string_arrow import (
+    ArrowStringArray,
+    ArrowStringArrayNumpySemantics,
+)
 
 skip_if_no_pyarrow = pytest.mark.skipif(
     pa_version_under7p0,
@@ -166,6 +169,9 @@ def test_pyarrow_not_installed_raises():
     with pytest.raises(ImportError, match=msg):
         ArrowStringArray([])
 
+    with pytest.raises(ImportError, match=msg):
+        ArrowStringArrayNumpySemantics([])
+
     with pytest.raises(ImportError, match=msg):
         ArrowStringArray._from_sequence(["a", None, "b"])
 
@@ -235,9 +241,10 @@ def test_setitem_invalid_indexer_raises():
 
 
 @skip_if_no_pyarrow
-def test_pickle_roundtrip():
+@pytest.mark.parametrize("dtype", ["string[pyarrow]", "string[pyarrow_numpy]"])
+def test_pickle_roundtrip(dtype):
     # GH 42600
-    expected = pd.Series(range(10), dtype="string[pyarrow]")
+    expected = pd.Series(range(10), dtype=dtype)
     expected_sliced = expected.head(2)
     full_pickled = pickle.dumps(expected)
     sliced_pickled = pickle.dumps(expected_sliced)
@@ -249,3 +256,11 @@ def test_pickle_roundtrip():
 
     result_sliced = pickle.loads(sliced_pickled)
     tm.assert_series_equal(result_sliced, expected_sliced)
+
+
+@skip_if_no_pyarrow
+def test_string_dtype_error_message():
+    # GH#55051
+    msg = "Storage must be 'python', 'pyarrow' or 'pyarrow_numpy'."
+    with pytest.raises(ValueError, match=msg):
+        StringDtype("bla")
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index 489f43729a004..5c21c4f7137a5 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -108,9 +108,13 @@ def test_groupby_extension_transform(self, data_for_grouping):
 
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
-        df.groupby("B", group_keys=False).apply(groupby_apply_op)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df.groupby("B", group_keys=False).apply(groupby_apply_op)
         df.groupby("B", group_keys=False).A.apply(groupby_apply_op)
-        df.groupby("A", group_keys=False).apply(groupby_apply_op)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df.groupby("A", group_keys=False).apply(groupby_apply_op)
         df.groupby("A", group_keys=False).B.apply(groupby_apply_op)
 
     def test_groupby_apply_identity(self, data_for_grouping):
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 13d80329f4d51..8968b9a7f25fe 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -31,6 +31,7 @@
 import pytest
 
 from pandas._libs import lib
+from pandas._libs.tslibs import timezones
 from pandas.compat import (
     PY311,
     is_ci_environment,
@@ -40,6 +41,7 @@
     pa_version_under9p0,
     pa_version_under11p0,
     pa_version_under13p0,
+    pa_version_under14p0,
 )
 
 from pandas.core.dtypes.dtypes import (
@@ -917,7 +919,7 @@ def _is_temporal_supported(self, opname, pa_dtype):
                 or (
                     opname
                     in ("__truediv__", "__rtruediv__", "__floordiv__", "__rfloordiv__")
-                    and not pa_version_under13p0
+                    and not pa_version_under14p0
                 )
             )
             and pa.types.is_duration(pa_dtype)
@@ -1595,6 +1597,19 @@ def test_to_numpy_null_array_no_dtype():
     tm.assert_numpy_array_equal(result, expected)
 
 
+def test_to_numpy_without_dtype():
+    # GH 54808
+    arr = pd.array([True, pd.NA], dtype="boolean[pyarrow]")
+    result = arr.to_numpy(na_value=False)
+    expected = np.array([True, False], dtype=np.bool_)
+    tm.assert_numpy_array_equal(result, expected)
+
+    arr = pd.array([1.0, pd.NA], dtype="float32[pyarrow]")
+    result = arr.to_numpy(na_value=0.0)
+    expected = np.array([1.0, 0.0], dtype=np.float32)
+    tm.assert_numpy_array_equal(result, expected)
+
+
 def test_setitem_null_slice(data):
     # GH50248
     orig = data.copy()
@@ -2418,7 +2433,7 @@ def test_dt_tz(tz):
         dtype=ArrowDtype(pa.timestamp("ns", tz=tz)),
     )
     result = ser.dt.tz
-    assert result == tz
+    assert result == timezones.maybe_get_tz(tz)
 
 
 def test_dt_isocalendar():
@@ -2979,6 +2994,15 @@ def test_groupby_count_return_arrow_dtype(data_missing):
     tm.assert_frame_equal(result, expected)
 
 
+def test_fixed_size_list():
+    # GH#55000
+    ser = pd.Series(
+        [[1, 2], [3, 4]], dtype=ArrowDtype(pa.list_(pa.int64(), list_size=2))
+    )
+    result = ser.dtype.type
+    assert result == list
+
+
 def test_arrowextensiondtype_dataframe_repr():
     # GH 54062
     df = pd.DataFrame(
diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py
index 95fcaaa473067..e7901ed363106 100644
--- a/pandas/tests/frame/methods/test_copy.py
+++ b/pandas/tests/frame/methods/test_copy.py
@@ -56,7 +56,7 @@ def test_copy_consolidates(self):
             }
         )
 
-        for i in range(0, 10):
+        for i in range(10):
             df.loc[:, f"n_{i}"] = np.random.default_rng(2).integers(0, 100, size=55)
 
         assert len(df._mgr.blocks) == 11
diff --git a/pandas/tests/frame/methods/test_filter.py b/pandas/tests/frame/methods/test_filter.py
index 1a2fbf8a65a55..9d5e6876bb08c 100644
--- a/pandas/tests/frame/methods/test_filter.py
+++ b/pandas/tests/frame/methods/test_filter.py
@@ -137,3 +137,17 @@ def test_filter_regex_non_string(self):
         result = df.filter(regex="STRING")
         expected = df[["STRING"]]
         tm.assert_frame_equal(result, expected)
+
+    def test_filter_keep_order(self):
+        # GH#54980
+        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        result = df.filter(items=["B", "A"])
+        expected = df[["B", "A"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_filter_different_dtype(self):
+        # GH#54980
+        df = DataFrame({1: [1, 2, 3], 2: [4, 5, 6]})
+        result = df.filter(items=["B", "A"])
+        expected = df[[]]
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py
index d0153da038a75..ede212ae18ae9 100644
--- a/pandas/tests/frame/methods/test_pct_change.py
+++ b/pandas/tests/frame/methods/test_pct_change.py
@@ -160,3 +160,21 @@ def test_pct_change_with_duplicated_indices(fill_method):
         index=["a", "b"] * 3,
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_pct_change_none_beginning_no_warning():
+    # GH#54481
+    df = DataFrame(
+        [
+            [1, None],
+            [2, 1],
+            [3, 2],
+            [4, 3],
+            [5, 4],
+        ]
+    )
+    result = df.pct_change()
+    expected = DataFrame(
+        {0: [np.nan, 1, 0.5, 1 / 3, 0.25], 1: [np.nan, np.nan, 1, 0.5, 1 / 3]}
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
index 0858e33a989b7..56bdd2fc664cc 100644
--- a/pandas/tests/frame/methods/test_reindex.py
+++ b/pandas/tests/frame/methods/test_reindex.py
@@ -26,7 +26,7 @@
     isna,
 )
 import pandas._testing as tm
-from pandas.api.types import CategoricalDtype as CDT
+from pandas.api.types import CategoricalDtype
 
 
 class TestReindexSetIndex:
@@ -1082,7 +1082,9 @@ def test_reindex_with_categoricalindex(self):
             {
                 "A": np.arange(3, dtype="int64"),
             },
-            index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"),
+            index=CategoricalIndex(
+                list("abc"), dtype=CategoricalDtype(list("cabe")), name="B"
+            ),
         )
 
         # reindexing
@@ -1111,13 +1113,13 @@ def test_reindex_with_categoricalindex(self):
 
         result = df.reindex(Categorical(["a", "e"], categories=cats))
         expected = DataFrame(
-            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
+            {"A": [0, np.nan], "B": Series(list("ae")).astype(CategoricalDtype(cats))}
         ).set_index("B")
         tm.assert_frame_equal(result, expected, check_index_type=True)
 
         result = df.reindex(Categorical(["a"], categories=cats))
         expected = DataFrame(
-            {"A": [0], "B": Series(list("a")).astype(CDT(cats))}
+            {"A": [0], "B": Series(list("a")).astype(CategoricalDtype(cats))}
         ).set_index("B")
         tm.assert_frame_equal(result, expected, check_index_type=True)
 
@@ -1138,13 +1140,19 @@ def test_reindex_with_categoricalindex(self):
         # give back the type of categorical that we received
         result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
         expected = DataFrame(
-            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
+            {
+                "A": [0, np.nan],
+                "B": Series(list("ae")).astype(CategoricalDtype(cats, ordered=True)),
+            }
         ).set_index("B")
         tm.assert_frame_equal(result, expected, check_index_type=True)
 
         result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
         expected = DataFrame(
-            {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
+            {
+                "A": [0, np.nan],
+                "B": Series(list("ad")).astype(CategoricalDtype(["a", "d"])),
+            }
         ).set_index("B")
         tm.assert_frame_equal(result, expected, check_index_type=True)
 
@@ -1152,7 +1160,9 @@ def test_reindex_with_categoricalindex(self):
             {
                 "A": np.arange(6, dtype="int64"),
             },
-            index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
+            index=CategoricalIndex(
+                list("aabbca"), dtype=CategoricalDtype(list("cabe")), name="B"
+            ),
         )
         # passed duplicate indexers are not allowed
         msg = "cannot reindex on an axis with duplicate labels"
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
index d99dd36f3a2e3..339e19254fd10 100644
--- a/pandas/tests/frame/methods/test_reset_index.py
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -788,15 +788,15 @@ def test_errorreset_index_rename(float_frame):
 
 
 def test_reset_index_false_index_name():
-    result_series = Series(data=range(5, 10), index=range(0, 5))
+    result_series = Series(data=range(5, 10), index=range(5))
     result_series.index.name = False
     result_series.reset_index()
-    expected_series = Series(range(5, 10), RangeIndex(range(0, 5), name=False))
+    expected_series = Series(range(5, 10), RangeIndex(range(5), name=False))
     tm.assert_series_equal(result_series, expected_series)
 
     # GH 38147
-    result_frame = DataFrame(data=range(5, 10), index=range(0, 5))
+    result_frame = DataFrame(data=range(5, 10), index=range(5))
     result_frame.index.name = False
     result_frame.reset_index()
-    expected_frame = DataFrame(range(5, 10), RangeIndex(range(0, 5), name=False))
+    expected_frame = DataFrame(range(5, 10), RangeIndex(range(5), name=False))
     tm.assert_frame_equal(result_frame, expected_frame)
diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index 228b62a418813..985a9e3602410 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -911,7 +911,7 @@ def test_sort_index_multiindex_sparse_column(self):
         expected = DataFrame(
             {
                 i: pd.array([0.0, 0.0, 0.0, 0.0], dtype=pd.SparseDtype("float64", 0.0))
-                for i in range(0, 4)
+                for i in range(4)
             },
             index=MultiIndex.from_product([[1, 2], [1, 2]]),
         )
diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py
index 7bb9518f9b0f9..61f0ad30b4519 100644
--- a/pandas/tests/frame/methods/test_to_dict.py
+++ b/pandas/tests/frame/methods/test_to_dict.py
@@ -166,6 +166,21 @@ def test_to_dict_not_unique_warning(self):
         with tm.assert_produces_warning(UserWarning):
             df.to_dict()
 
+    @pytest.mark.filterwarnings("ignore::UserWarning")
+    @pytest.mark.parametrize(
+        "orient,expected",
+        [
+            ("list", {"A": [2, 5], "B": [3, 6]}),
+            ("dict", {"A": {0: 2, 1: 5}, "B": {0: 3, 1: 6}}),
+        ],
+    )
+    def test_to_dict_not_unique(self, orient, expected):
+        # GH#54824: This is to make sure that dataframes with non-unique column
+        # would have uniform behavior throughout different orients
+        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "A", "B"])
+        result = df.to_dict(orient)
+        assert result == expected
+
     # orient - orient argument to to_dict function
     # item_getter - function for extracting value from
     # the resulting dict using column name and index
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 19fcddb5dbe2b..ed3fb079d745a 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -692,12 +692,12 @@ def test_constructor_error_msgs(self):
         arr = np.array([[4, 5, 6]])
         msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)"
         with pytest.raises(ValueError, match=msg):
-            DataFrame(index=[0], columns=range(0, 4), data=arr)
+            DataFrame(index=[0], columns=range(4), data=arr)
 
         arr = np.array([4, 5, 6])
         msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)"
         with pytest.raises(ValueError, match=msg):
-            DataFrame(index=[0], columns=range(0, 4), data=arr)
+            DataFrame(index=[0], columns=range(4), data=arr)
 
         # higher dim raise exception
         with pytest.raises(ValueError, match="Must pass 2-d input"):
@@ -2393,7 +2393,7 @@ def test_construct_with_two_categoricalindex_series(self):
 
     def test_constructor_series_nonexact_categoricalindex(self):
         # GH 42424
-        ser = Series(range(0, 100))
+        ser = Series(range(100))
         ser1 = cut(ser, 10).value_counts().head(5)
         ser2 = cut(ser, 10).value_counts().tail(5)
         result = DataFrame({"1": ser1, "2": ser2})
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index c90b871d5d66f..b54a795af4fdc 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -1767,7 +1767,9 @@ def test_unstack_bug(self, future_stack):
             }
         )
 
-        result = df.groupby(["state", "exp", "barcode", "v"]).apply(len)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby(["state", "exp", "barcode", "v"]).apply(len)
 
         unstacked = result.unstack()
         restacked = unstacked.stack(future_stack=future_stack)
@@ -2508,3 +2510,19 @@ def test_unstack_mixed_level_names(self):
             index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]),
         )
         tm.assert_frame_equal(result, expected)
+
+
+def test_stack_tuple_columns(future_stack):
+    # GH#54948 - test stack when the input has a non-MultiIndex with tuples
+    df = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=[("a", 1), ("a", 2), ("b", 1)]
+    )
+    result = df.stack(future_stack=future_stack)
+    expected = Series(
+        [1, 2, 3, 4, 5, 6, 7, 8, 9],
+        index=MultiIndex(
+            levels=[[0, 1, 2], [("a", 1), ("a", 2), ("b", 1)]],
+            codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        ),
+    )
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index c01ca4922a84b..882f42ff18bdd 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -515,6 +515,18 @@ def test_groupby_agg_dict_with_getitem():
     tm.assert_frame_equal(result, expected)
 
 
+def test_groupby_agg_dict_dup_columns():
+    # GH#55006
+    df = DataFrame(
+        [[1, 2, 3, 4], [1, 3, 4, 5], [2, 4, 5, 6]],
+        columns=["a", "b", "c", "c"],
+    )
+    gb = df.groupby("a")
+    result = gb.agg({"b": "sum"})
+    expected = DataFrame({"b": [5, 4]}, index=Index([1, 2], name="a"))
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "op",
     [
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 9d3ebbd3672ae..7ea107f254104 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -499,13 +499,17 @@ def test_agg_timezone_round_trip():
     assert ts == grouped.first()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
 
     ts = df["B"].iloc[2]
     assert ts == grouped.last()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
 
 
 def test_sum_uint64_overflow():
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index d04ee7cec0db1..abcb9f68e0f5c 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -28,7 +28,9 @@ def test_apply_func_that_appends_group_to_list_without_copy():
     def store(group):
         groups.append(group)
 
-    df.groupby("index").apply(store)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df.groupby("index").apply(store)
     expected_value = DataFrame(
         {"index": [0] * 10, 0: [1] * 10}, index=pd.RangeIndex(0, 100, 10)
     )
@@ -71,9 +73,11 @@ def test_apply_issues():
         ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date"
     )
     expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
-    result = df.groupby("date", group_keys=False).apply(
-        lambda x: x["time"][x["value"].idxmax()]
-    )
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("date", group_keys=False).apply(
+            lambda x: x["time"][x["value"].idxmax()]
+        )
     tm.assert_series_equal(result, expected)
 
 
@@ -179,7 +183,9 @@ def f_constant_df(group):
     for func in [f_copy, f_nocopy, f_scalar, f_none, f_constant_df]:
         del names[:]
 
-        df.groupby("a", group_keys=False).apply(func)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df.groupby("a", group_keys=False).apply(func)
         assert names == group_names
 
 
@@ -197,9 +203,11 @@ def test_group_apply_once_per_group2(capsys):
         index=["0", "2", "4", "6", "8", "10", "12", "14"],
     )
 
-    df.groupby("group_by_column", group_keys=False).apply(
-        lambda df: print("function_called")
-    )
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df.groupby("group_by_column", group_keys=False).apply(
+            lambda df: print("function_called")
+        )
 
     result = capsys.readouterr().out.count("function_called")
     # If `groupby` behaves unexpectedly, this test will break
@@ -219,8 +227,11 @@ def slow(group):
     def fast(group):
         return group.copy()
 
-    fast_df = df.groupby("A", group_keys=False).apply(fast)
-    slow_df = df.groupby("A", group_keys=False).apply(slow)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        fast_df = df.groupby("A", group_keys=False).apply(fast)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        slow_df = df.groupby("A", group_keys=False).apply(slow)
 
     tm.assert_frame_equal(fast_df, slow_df)
 
@@ -242,7 +253,9 @@ def test_groupby_apply_identity_maybecopy_index_identical(func):
 
     df = DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
 
-    result = df.groupby("g", group_keys=False).apply(func)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("g", group_keys=False).apply(func)
     tm.assert_frame_equal(result, df)
 
 
@@ -285,8 +298,11 @@ def test_groupby_as_index_apply():
     tm.assert_index_equal(res_as, exp)
     tm.assert_index_equal(res_not_as, exp)
 
-    res_as_apply = g_as.apply(lambda x: x.head(2)).index
-    res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        res_as_apply = g_as.apply(lambda x: x.head(2)).index
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
 
     # apply doesn't maintain the original ordering
     # changed in GH5610 as the as_index=False returns a MI here
@@ -299,7 +315,9 @@ def test_groupby_as_index_apply():
 
     ind = Index(list("abcde"))
     df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
-    res = df.groupby(0, as_index=False, group_keys=False).apply(lambda x: x).index
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        res = df.groupby(0, as_index=False, group_keys=False).apply(lambda x: x).index
     tm.assert_index_equal(res, ind)
 
 
@@ -328,13 +346,19 @@ def desc3(group):
         # weirdo
         return result
 
-    result = grouped.apply(desc)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.apply(desc)
     assert result.index.names == ("A", "B", "stat")
 
-    result2 = grouped.apply(desc2)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result2 = grouped.apply(desc2)
     assert result2.index.names == ("A", "B", "stat")
 
-    result3 = grouped.apply(desc3)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result3 = grouped.apply(desc3)
     assert result3.index.names == ("A", "B", None)
 
 
@@ -364,7 +388,9 @@ def test_apply_series_yield_constant(df):
 
 def test_apply_frame_yield_constant(df):
     # GH13568
-    result = df.groupby(["A", "B"]).apply(len)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(["A", "B"]).apply(len)
     assert isinstance(result, Series)
     assert result.name is None
 
@@ -375,7 +401,9 @@ def test_apply_frame_yield_constant(df):
 
 def test_apply_frame_to_series(df):
     grouped = df.groupby(["A", "B"])
-    result = grouped.apply(len)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.apply(len)
     expected = grouped.count()["C"]
     tm.assert_index_equal(result.index, expected.index)
     tm.assert_numpy_array_equal(result.values, expected.values)
@@ -384,7 +412,9 @@ def test_apply_frame_to_series(df):
 def test_apply_frame_not_as_index_column_name(df):
     # GH 35964 - path within _wrap_applied_output not hit by a test
     grouped = df.groupby(["A", "B"], as_index=False)
-    result = grouped.apply(len)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.apply(len)
     expected = grouped.count().rename(columns={"C": np.nan}).drop(columns="D")
     # TODO(GH#34306): Use assert_frame_equal when column name is not np.nan
     tm.assert_index_equal(result.index, expected.index)
@@ -407,7 +437,9 @@ def trans2(group):
         }
     )
 
-    result = df.groupby("A").apply(trans)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(trans)
     exp = df.groupby("A")["C"].apply(trans2)
     tm.assert_series_equal(result, exp, check_names=False)
     assert result.name == "C"
@@ -436,7 +468,9 @@ def test_apply_chunk_view(group_keys):
     # Low level tinkering could be unsafe, make sure not
     df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
 
-    result = df.groupby("key", group_keys=group_keys).apply(lambda x: x.iloc[:2])
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("key", group_keys=group_keys).apply(lambda x: x.iloc[:2])
     expected = df.take([0, 1, 3, 4, 6, 7])
     if group_keys:
         expected.index = MultiIndex.from_arrays(
@@ -457,7 +491,9 @@ def test_apply_no_name_column_conflict():
 
     # it works! #2605
     grouped = df.groupby(["name", "name2"])
-    grouped.apply(lambda x: x.sort_values("value", inplace=True))
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        grouped.apply(lambda x: x.sort_values("value", inplace=True))
 
 
 def test_apply_typecast_fail():
@@ -474,7 +510,9 @@ def f(group):
         group["v2"] = (v - v.min()) / (v.max() - v.min())
         return group
 
-    result = df.groupby("d", group_keys=False).apply(f)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("d", group_keys=False).apply(f)
 
     expected = df.copy()
     expected["v2"] = np.tile([0.0, 0.5, 1], 2)
@@ -498,7 +536,9 @@ def f(group):
         group["v2"] = (v - v.min()) / (v.max() - v.min())
         return group
 
-    result = df.groupby("d", group_keys=False).apply(f)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("d", group_keys=False).apply(f)
 
     expected = df.copy()
     expected["v2"] = np.tile([0.0, 0.5, 1], 2)
@@ -536,8 +576,11 @@ def filt2(x):
         else:
             return x[x.category == "c"]
 
-    expected = data.groupby("id_field").apply(filt1)
-    result = data.groupby("id_field").apply(filt2)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = data.groupby("id_field").apply(filt1)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = data.groupby("id_field").apply(filt2)
     tm.assert_frame_equal(result, expected)
 
 
@@ -556,7 +599,9 @@ def test_apply_with_duplicated_non_sorted_axis(test_series):
         expected = ser.sort_index()
         tm.assert_series_equal(result, expected)
     else:
-        result = df.groupby("Y", group_keys=False).apply(lambda x: x)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby("Y", group_keys=False).apply(lambda x: x)
 
         # not expecting the order to remain the same for duplicated axis
         result = result.sort_values("Y")
@@ -601,7 +646,9 @@ def f(g):
         g["value3"] = g["value1"] * 2
         return g
 
-    result = grouped.apply(f)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.apply(f)
     assert "value3" in result
 
 
@@ -615,9 +662,13 @@ def test_apply_numeric_coercion_when_datetime():
     df = DataFrame(
         {"Number": [1, 2], "Date": ["2017-03-02"] * 2, "Str": ["foo", "inf"]}
     )
-    expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0])
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0])
     df.Date = pd.to_datetime(df.Date)
-    result = df.groupby(["Number"]).apply(lambda x: x.iloc[0])
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(["Number"]).apply(lambda x: x.iloc[0])
     tm.assert_series_equal(result["Str"], expected["Str"])
 
     # GH 15421
@@ -628,7 +679,9 @@ def test_apply_numeric_coercion_when_datetime():
     def get_B(g):
         return g.iloc[0][["B"]]
 
-    result = df.groupby("A").apply(get_B)["B"]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(get_B)["B"]
     expected = df.B
     expected.index = df.A
     tm.assert_series_equal(result, expected)
@@ -653,8 +706,11 @@ def predictions(tool):
     )
     df2 = df1.copy()
     df2.oTime = pd.to_datetime(df2.oTime)
-    expected = df1.groupby("Key").apply(predictions).p1
-    result = df2.groupby("Key").apply(predictions).p1
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df1.groupby("Key").apply(predictions).p1
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df2.groupby("Key").apply(predictions).p1
     tm.assert_series_equal(expected, result)
 
 
@@ -669,11 +725,13 @@ def test_apply_aggregating_timedelta_and_datetime():
         }
     )
     df["time_delta_zero"] = df.datetime - df.datetime
-    result = df.groupby("clientid").apply(
-        lambda ddf: Series(
-            {"clientid_age": ddf.time_delta_zero.min(), "date": ddf.datetime.min()}
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("clientid").apply(
+            lambda ddf: Series(
+                {"clientid_age": ddf.time_delta_zero.min(), "date": ddf.datetime.min()}
+            )
         )
-    )
     expected = DataFrame(
         {
             "clientid": ["A", "B", "C"],
@@ -716,11 +774,15 @@ def func_with_no_date(batch):
     def func_with_date(batch):
         return Series({"b": datetime(2015, 1, 1), "c": 2})
 
-    dfg_no_conversion = df.groupby(by=["a"]).apply(func_with_no_date)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        dfg_no_conversion = df.groupby(by=["a"]).apply(func_with_no_date)
     dfg_no_conversion_expected = DataFrame({"c": 2}, index=[1])
     dfg_no_conversion_expected.index.name = "a"
 
-    dfg_conversion = df.groupby(by=["a"]).apply(func_with_date)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        dfg_conversion = df.groupby(by=["a"]).apply(func_with_date)
     dfg_conversion_expected = DataFrame(
         {"b": pd.Timestamp(2015, 1, 1).as_unit("ns"), "c": 2}, index=[1]
     )
@@ -764,7 +826,9 @@ def test_groupby_apply_all_none():
     def test_func(x):
         pass
 
-    result = test_df.groupby("groups").apply(test_func)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = test_df.groupby("groups").apply(test_func)
     expected = DataFrame()
     tm.assert_frame_equal(result, expected)
 
@@ -779,8 +843,11 @@ def test_func(x):
             return None
         return x.iloc[[0, -1]]
 
-    result1 = test_df1.groupby("groups").apply(test_func)
-    result2 = test_df2.groupby("groups").apply(test_func)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result1 = test_df1.groupby("groups").apply(test_func)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result2 = test_df2.groupby("groups").apply(test_func)
     index1 = MultiIndex.from_arrays([[1, 1], [0, 2]], names=["groups", None])
     index2 = MultiIndex.from_arrays([[2, 2], [1, 3]], names=["groups", None])
     expected1 = DataFrame({"groups": [1, 1], "vars": [0, 2]}, index=index1)
@@ -793,7 +860,9 @@ def test_groupby_apply_return_empty_chunk():
     # GH 22221: apply filter which returns some empty groups
     df = DataFrame({"value": [0, 1], "group": ["filled", "empty"]})
     groups = df.groupby("group")
-    result = groups.apply(lambda group: group[group.value != 1]["value"])
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = groups.apply(lambda group: group[group.value != 1]["value"])
     expected = Series(
         [0],
         name="value",
@@ -820,7 +889,9 @@ def test_apply_with_mixed_types():
 def test_func_returns_object():
     # GH 28652
     df = DataFrame({"a": [1, 2]}, index=Index([1, 2]))
-    result = df.groupby("a").apply(lambda g: g.index)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("a").apply(lambda g: g.index)
     expected = Series([Index([1]), Index([2])], index=Index([1, 2], name="a"))
 
     tm.assert_series_equal(result, expected)
@@ -837,7 +908,9 @@ def test_apply_datetime_issue(group_column_dtlike):
     #   standard int values in range(len(num_columns))
 
     df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]})
-    result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42]))
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42]))
 
     expected = DataFrame(
         ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42]
@@ -876,7 +949,9 @@ def test_apply_series_return_dataframe_groups():
     def most_common_values(df):
         return Series({c: s.value_counts().index[0] for c, s in df.items()})
 
-    result = tdf.groupby("day").apply(most_common_values)["userId"]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = tdf.groupby("day").apply(most_common_values)["userId"]
     expected = Series(
         ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId"
     )
@@ -917,7 +992,9 @@ def test_groupby_apply_datetime_result_dtypes():
         ],
         columns=["observation", "color", "mood", "intensity", "score"],
     )
-    result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes
     expected = Series(
         [np.dtype("datetime64[ns]"), object, object, np.int64, object],
         index=["observation", "color", "mood", "intensity", "score"],
@@ -937,7 +1014,9 @@ def test_groupby_apply_datetime_result_dtypes():
 def test_apply_index_has_complex_internals(index):
     # GH 31248
     df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
-    result = df.groupby("group", group_keys=False).apply(lambda x: x)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("group", group_keys=False).apply(lambda x: x)
     tm.assert_frame_equal(result, df)
 
 
@@ -960,7 +1039,9 @@ def test_apply_index_has_complex_internals(index):
 def test_apply_function_returns_non_pandas_non_scalar(function, expected_values):
     # GH 31441
     df = DataFrame(["A", "A", "B", "B"], columns=["groups"])
-    result = df.groupby("groups").apply(function)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("groups").apply(function)
     expected = Series(expected_values, index=Index(["A", "B"], name="groups"))
     tm.assert_series_equal(result, expected)
 
@@ -972,7 +1053,9 @@ def fct(group):
 
     df = DataFrame({"A": ["a", "a", "b", "none"], "B": [1, 2, 3, np.nan]})
 
-    result = df.groupby("A").apply(fct)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(fct)
     expected = Series(
         [[1.0, 2.0], [3.0], [np.nan]], index=Index(["a", "b", "none"], name="A")
     )
@@ -983,7 +1066,9 @@ def fct(group):
 def test_apply_function_index_return(function):
     # GH: 22541
     df = DataFrame([1, 2, 2, 2, 1, 2, 3, 1, 3, 1], columns=["id"])
-    result = df.groupby("id").apply(function)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("id").apply(function)
     expected = Series(
         [Index([0, 4, 7, 9]), Index([1, 2, 3, 5]), Index([6, 8])],
         index=Index([1, 2, 3], name="id"),
@@ -1019,7 +1104,9 @@ def test_apply_result_type(group_keys, udf):
     # We'd like to control whether the group keys end up in the index
     # regardless of whether the UDF happens to be a transform.
     df = DataFrame({"A": ["a", "b"], "B": [1, 2]})
-    df_result = df.groupby("A", group_keys=group_keys).apply(udf)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df_result = df.groupby("A", group_keys=group_keys).apply(udf)
     series_result = df.B.groupby(df.A, group_keys=group_keys).apply(udf)
 
     if group_keys:
@@ -1034,8 +1121,11 @@ def test_result_order_group_keys_false():
     # GH 34998
     # apply result order should not depend on whether index is the same or just equal
     df = DataFrame({"A": [2, 1, 2], "B": [1, 2, 3]})
-    result = df.groupby("A", group_keys=False).apply(lambda x: x)
-    expected = df.groupby("A", group_keys=False).apply(lambda x: x.copy())
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A", group_keys=False).apply(lambda x: x)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby("A", group_keys=False).apply(lambda x: x.copy())
     tm.assert_frame_equal(result, expected)
 
 
@@ -1047,8 +1137,15 @@ def test_apply_with_timezones_aware():
     df1 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_no_tz})
     df2 = DataFrame({"x": list(range(2)) * 3, "y": range(6), "t": index_tz})
 
-    result1 = df1.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy())
-    result2 = df2.groupby("x", group_keys=False).apply(lambda df: df[["x", "y"]].copy())
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result1 = df1.groupby("x", group_keys=False).apply(
+            lambda df: df[["x", "y"]].copy()
+        )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result2 = df2.groupby("x", group_keys=False).apply(
+            lambda df: df[["x", "y"]].copy()
+        )
 
     tm.assert_frame_equal(result1, result2)
 
@@ -1103,7 +1200,9 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp():
     )
 
     grp = df.groupby(["A", "B"])
-    result = grp.apply(lambda x: x.head(1))
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grp.apply(lambda x: x.head(1))
 
     expected = df.iloc[[0, 2, 3]]
     expected = expected.reset_index()
@@ -1151,7 +1250,9 @@ def test_apply_dropna_with_indexed_same(dropna):
         },
         index=list("xxyxz"),
     )
-    result = df.groupby("group", dropna=dropna, group_keys=False).apply(lambda x: x)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("group", dropna=dropna, group_keys=False).apply(lambda x: x)
     expected = df.dropna() if dropna else df.iloc[[0, 3, 1, 2, 4]]
     tm.assert_frame_equal(result, expected)
 
@@ -1176,7 +1277,9 @@ def test_apply_dropna_with_indexed_same(dropna):
 def test_apply_as_index_constant_lambda(as_index, expected):
     # GH 13217
     df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 1, 2, 2], "c": [1, 1, 1, 1]})
-    result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1)
     tm.assert_equal(result, expected)
 
 
@@ -1186,7 +1289,9 @@ def test_sort_index_groups():
         {"A": [1, 2, 3, 4, 5], "B": [6, 7, 8, 9, 0], "C": [1, 1, 1, 2, 2]},
         index=range(5),
     )
-    result = df.groupby("C").apply(lambda x: x.A.sort_index())
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("C").apply(lambda x: x.A.sort_index())
     expected = Series(
         range(1, 6),
         index=MultiIndex.from_tuples(
@@ -1206,9 +1311,11 @@ def test_positional_slice_groups_datetimelike():
             "let": list("abcde"),
         }
     )
-    result = expected.groupby(
-        [expected.let, expected.date.dt.date], group_keys=False
-    ).apply(lambda x: x.iloc[0:])
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = expected.groupby(
+            [expected.let, expected.date.dt.date], group_keys=False
+        ).apply(lambda x: x.iloc[0:])
     tm.assert_frame_equal(result, expected)
 
 
@@ -1251,24 +1358,29 @@ def test_apply_na(dropna):
         {"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]}
     )
     dfgrp = df.groupby("grp", dropna=dropna)
-    result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z"))
-    expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1))
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z"))
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1))
     tm.assert_frame_equal(result, expected)
 
 
 def test_apply_empty_string_nan_coerce_bug():
     # GH#24903
-    result = (
-        DataFrame(
-            {
-                "a": [1, 1, 2, 2],
-                "b": ["", "", "", ""],
-                "c": pd.to_datetime([1, 2, 3, 4], unit="s"),
-            }
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = (
+            DataFrame(
+                {
+                    "a": [1, 1, 2, 2],
+                    "b": ["", "", "", ""],
+                    "c": pd.to_datetime([1, 2, 3, 4], unit="s"),
+                }
+            )
+            .groupby(["a", "b"])
+            .apply(lambda df: df.iloc[-1])
         )
-        .groupby(["a", "b"])
-        .apply(lambda df: df.iloc[-1])
-    )
     expected = DataFrame(
         [[1, "", pd.to_datetime(2, unit="s")], [2, "", pd.to_datetime(4, unit="s")]],
         columns=["a", "b", "c"],
@@ -1293,9 +1405,11 @@ def test_apply_index_key_error_bug(index_values):
         },
         index=Index(["a2", "a3", "aa"], name="a"),
     )
-    result = result.groupby("a").apply(
-        lambda df: Series([df["b"].mean()], index=["b_mean"])
-    )
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = result.groupby("a").apply(
+            lambda df: Series([df["b"].mean()], index=["b_mean"])
+        )
     tm.assert_frame_equal(result, expected)
 
 
@@ -1343,7 +1457,9 @@ def test_apply_index_key_error_bug(index_values):
 def test_apply_nonmonotonic_float_index(arg, idx):
     # GH 34455
     expected = DataFrame({"col": arg}, index=idx)
-    result = expected.groupby("col", group_keys=False).apply(lambda x: x)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = expected.groupby("col", group_keys=False).apply(lambda x: x)
     tm.assert_frame_equal(result, expected)
 
 
@@ -1390,33 +1506,16 @@ def test_empty_df(method, op):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "group_col",
-    [([0.0, np.nan, 0.0, 0.0]), ([np.nan, 0.0, 0.0, 0.0]), ([0, 0.0, 0.0, np.nan])],
-)
-def test_apply_inconsistent_output(group_col):
-    # GH 34478
-    df = DataFrame({"group_col": group_col, "value_col": [2, 2, 2, 2]})
-
-    result = df.groupby("group_col").value_col.apply(
-        lambda x: x.value_counts().reindex(index=[1, 2, 3])
-    )
-    expected = Series(
-        [np.nan, 3.0, np.nan],
-        name="value_col",
-        index=MultiIndex.from_product([[0.0], [1, 2, 3]], names=["group_col", 0.0]),
-    )
-
-    tm.assert_series_equal(result, expected)
-
-
-def test_apply_array_output_multi_getitem():
-    # GH 18930
-    df = DataFrame(
-        {"A": {"a": 1, "b": 2}, "B": {"a": 1, "b": 2}, "C": {"a": 1, "b": 2}}
-    )
-    result = df.groupby("A")[["B", "C"]].apply(lambda x: np.array([0]))
-    expected = Series(
-        [np.array([0])] * 2, index=Index([1, 2], name="A"), name=("B", "C")
-    )
-    tm.assert_series_equal(result, expected)
+@pytest.mark.parametrize("include_groups", [True, False])
+def test_include_groups(include_groups):
+    # GH#7155
+    df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5]})
+    gb = df.groupby("a")
+    warn = FutureWarning if include_groups else None
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = gb.apply(lambda x: x.sum(), include_groups=include_groups)
+    expected = DataFrame({"a": [2, 2], "b": [7, 5]}, index=Index([1, 2], name="a"))
+    if not include_groups:
+        expected = expected[["b"]]
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py
index 9bc07b584e9d1..09d5e06bf6ddd 100644
--- a/pandas/tests/groupby/test_apply_mutate.py
+++ b/pandas/tests/groupby/test_apply_mutate.py
@@ -13,10 +13,16 @@ def test_group_by_copy():
         }
     ).set_index("name")
 
-    grp_by_same_value = df.groupby(["age"], group_keys=False).apply(lambda group: group)
-    grp_by_copy = df.groupby(["age"], group_keys=False).apply(
-        lambda group: group.copy()
-    )
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        grp_by_same_value = df.groupby(["age"], group_keys=False).apply(
+            lambda group: group
+        )
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        grp_by_copy = df.groupby(["age"], group_keys=False).apply(
+            lambda group: group.copy()
+        )
     tm.assert_frame_equal(grp_by_same_value, grp_by_copy)
 
 
@@ -47,8 +53,11 @@ def f_no_copy(x):
         x["rank"] = x.val.rank(method="min")
         return x.groupby("cat2")["rank"].min()
 
-    grpby_copy = df.groupby("cat1").apply(f_copy)
-    grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        grpby_copy = df.groupby("cat1").apply(f_copy)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
     tm.assert_series_equal(grpby_copy, grpby_no_copy)
 
 
@@ -58,8 +67,11 @@ def test_no_mutate_but_looks_like():
     # second does not, but should yield the same results
     df = pd.DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
 
-    result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key)
-    result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key)
     tm.assert_series_equal(result1, result2)
 
 
@@ -73,7 +85,9 @@ def fn(x):
         x.loc[x.index[-1], "col2"] = 0
         return x.col2
 
-    result = df.groupby(["col1"], as_index=False).apply(fn)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(["col1"], as_index=False).apply(fn)
     expected = pd.Series(
         [1, 2, 0, 4, 5, 0],
         index=pd.MultiIndex.from_tuples(
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index f2d21c10f7a15..b11240c841420 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -124,7 +124,9 @@ def test_basic():  # TODO: split this test
     def f(x):
         return x.drop_duplicates("person_name").iloc[0]
 
-    result = g.apply(f)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
     expected.index = Index([1, 2], name="person_id")
     expected["person_name"] = expected["person_name"].astype("object")
@@ -329,7 +331,9 @@ def test_apply(ordered):
     # but for transform we should still get back the original index
     idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"])
     expected = Series(1, index=idx)
-    result = grouped.apply(lambda x: 1)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.apply(lambda x: 1)
     tm.assert_series_equal(result, expected)
 
 
@@ -2013,7 +2017,10 @@ def test_category_order_apply(as_index, sort, observed, method, index_kind, orde
         df["a2"] = df["a"]
         df = df.set_index(keys)
     gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
-    op_result = getattr(gb, method)(lambda x: x.sum(numeric_only=True))
+    warn = FutureWarning if method == "apply" and index_kind == "range" else None
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(warn, match=msg):
+        op_result = getattr(gb, method)(lambda x: x.sum(numeric_only=True))
     if (method == "transform" or not as_index) and index_kind == "range":
         result = op_result["a"].cat.categories
     else:
diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
index 25a4fd2550df6..16d7fe61b90ad 100644
--- a/pandas/tests/groupby/test_counting.py
+++ b/pandas/tests/groupby/test_counting.py
@@ -289,7 +289,9 @@ def test_count():
 
     for key in ["1st", "2nd", ["1st", "2nd"]]:
         left = df.groupby(key).count()
-        right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
         tm.assert_frame_equal(left, right)
 
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 0abf6428730ff..287310a18c7df 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -95,10 +95,12 @@ def test_builtins_apply(keys, f):
     assert result.shape == (ngroups, 3), assert_msg
 
     npfunc = lambda x: getattr(np, fname)(x, axis=0)  # numpy's equivalent function
-    expected = gb.apply(npfunc)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = gb.apply(npfunc)
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(None):
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         expected2 = gb.apply(lambda x: npfunc(x))
     tm.assert_frame_equal(result, expected2)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index c0ac94c09e1ea..fdd959f0e8754 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -150,7 +150,9 @@ def test_groupby_nonobject_dtype(mframe, df_mixed_floats):
     def max_value(group):
         return group.loc[group["value"].idxmax()]
 
-    applied = df.groupby("A").apply(max_value)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        applied = df.groupby("A").apply(max_value)
     result = applied.dtypes
     expected = df.dtypes
     tm.assert_series_equal(result, expected)
@@ -171,7 +173,9 @@ def f_0(grp):
         return grp.iloc[0]
 
     expected = df.groupby("A").first()[["B"]]
-    result = df.groupby("A").apply(f_0)[["B"]]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_0)[["B"]]
     tm.assert_frame_equal(result, expected)
 
     def f_1(grp):
@@ -179,9 +183,10 @@ def f_1(grp):
             return None
         return grp.iloc[0]
 
-    result = df.groupby("A").apply(f_1)[["B"]]
-    # Cast to avoid upcast when setting nan below
-    e = expected.copy().astype("float64")
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_1)[["B"]]
+    e = expected.copy()
     e.loc["Tiger"] = np.nan
     tm.assert_frame_equal(result, e)
 
@@ -190,9 +195,10 @@ def f_2(grp):
             return None
         return grp.iloc[0]
 
-    result = df.groupby("A").apply(f_2)[["B"]]
-    # Explicit cast to float to avoid implicit cast when setting nan
-    e = expected.copy().astype({"B": "float"})
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_2)[["B"]]
+    e = expected.copy()
     e.loc["Pony"] = np.nan
     tm.assert_frame_equal(result, e)
 
@@ -202,7 +208,9 @@ def f_3(grp):
             return None
         return grp.iloc[0]
 
-    result = df.groupby("A").apply(f_3)[["C"]]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_3)[["C"]]
     e = df.groupby("A").first()[["C"]]
     e.loc["Pony"] = pd.NaT
     tm.assert_frame_equal(result, e)
@@ -213,7 +221,9 @@ def f_4(grp):
             return None
         return grp.iloc[0].loc["C"]
 
-    result = df.groupby("A").apply(f_4)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").apply(f_4)
     e = df.groupby("A").first()["C"].copy()
     e.loc["Pony"] = np.nan
     e.name = None
@@ -392,8 +402,11 @@ def f3(x):
     depr_msg = "The behavior of array concatenation with empty entries is deprecated"
 
     # correct result
-    result1 = df.groupby("a").apply(f1)
-    result2 = df2.groupby("a").apply(f1)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result1 = df.groupby("a").apply(f1)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result2 = df2.groupby("a").apply(f1)
     tm.assert_frame_equal(result1, result2)
 
     # should fail (not the same number of levels)
@@ -1322,11 +1335,15 @@ def summarize_random_name(df):
         # inconsistent.
         return Series({"count": 1, "mean": 2, "omissions": 3}, name=df.iloc[0]["A"])
 
-    metrics = df.groupby("A").apply(summarize)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        metrics = df.groupby("A").apply(summarize)
     assert metrics.columns.name is None
-    metrics = df.groupby("A").apply(summarize, "metrics")
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        metrics = df.groupby("A").apply(summarize, "metrics")
     assert metrics.columns.name == "metrics"
-    metrics = df.groupby("A").apply(summarize_random_name)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        metrics = df.groupby("A").apply(summarize_random_name)
     assert metrics.columns.name is None
 
 
@@ -1619,7 +1636,9 @@ def test_dont_clobber_name_column():
         {"key": ["a", "a", "a", "b", "b", "b"], "name": ["foo", "bar", "baz"] * 2}
     )
 
-    result = df.groupby("key", group_keys=False).apply(lambda x: x)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("key", group_keys=False).apply(lambda x: x)
     tm.assert_frame_equal(result, df)
 
 
@@ -1693,7 +1712,9 @@ def freducex(x):
     grouped = df.groupby(grouper, group_keys=False)
 
     # make sure all these work
-    grouped.apply(f)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        grouped.apply(f)
     grouped.aggregate(freduce)
     grouped.aggregate({"C": freduce, "D": freduce})
     grouped.transform(f)
@@ -1714,7 +1735,9 @@ def f(group):
         names.append(group.name)
         return group.copy()
 
-    df.groupby("a", sort=False, group_keys=False).apply(f)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        df.groupby("a", sort=False, group_keys=False).apply(f)
 
     expected_names = [0, 1, 2]
     assert names == expected_names
@@ -1920,7 +1943,9 @@ def test_groupby_preserves_sort(sort_column, group_column):
     def test_sort(x):
         tm.assert_frame_equal(x, x.sort_values(by=sort_column))
 
-    g.apply(test_sort)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        g.apply(test_sort)
 
 
 def test_pivot_table_values_key_error():
@@ -1928,7 +1953,7 @@ def test_pivot_table_values_key_error():
     df = DataFrame(
         {
             "eventDate": date_range(datetime.today(), periods=20, freq="M").tolist(),
-            "thename": range(0, 20),
+            "thename": range(20),
         }
     )
 
@@ -2102,7 +2127,9 @@ def test_empty_groupby_apply_nonunique_columns():
     df[3] = df[3].astype(np.int64)
     df.columns = [0, 1, 2, 0]
     gb = df.groupby(df[1], group_keys=False)
-    res = gb.apply(lambda x: x)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        res = gb.apply(lambda x: x)
     assert (res.dtypes == df.dtypes).all()
 
 
@@ -3187,3 +3214,34 @@ def test_depr_get_group_len_1_list_likes(test_series, kwarg, value, name, warn):
     else:
         expected = DataFrame({"b": [3, 4]}, index=Index([1, 1], name="a"))
     tm.assert_equal(result, expected)
+
+
+def test_groupby_ngroup_with_nan():
+    # GH#50100
+    df = DataFrame({"a": Categorical([np.nan]), "b": [1]})
+    result = df.groupby(["a", "b"], dropna=False, observed=False).ngroup()
+    expected = Series([0])
+    tm.assert_series_equal(result, expected)
+
+
+def test_get_group_axis_1():
+    # GH#54858
+    df = DataFrame(
+        {
+            "col1": [0, 3, 2, 3],
+            "col2": [4, 1, 6, 7],
+            "col3": [3, 8, 2, 10],
+            "col4": [1, 13, 6, 15],
+            "col5": [-4, 5, 6, -7],
+        }
+    )
+    with tm.assert_produces_warning(FutureWarning, match="deprecated"):
+        grouped = df.groupby(axis=1, by=[1, 2, 3, 2, 1])
+    result = grouped.get_group(1)
+    expected = DataFrame(
+        {
+            "col1": [0, 3, 2, 3],
+            "col5": [-4, 5, 6, -7],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 099e7bc3890d0..d82278c277d48 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -324,7 +324,9 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data,
 
     df = pd.DataFrame(data)
     gb = df.groupby("groups", dropna=dropna)
-    result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))
 
     mi_tuples = tuple(zip(data["groups"], selected_data["values"]))
     mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None])
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
index 773c1e60e97af..601e67bbca5e3 100644
--- a/pandas/tests/groupby/test_groupby_subclass.py
+++ b/pandas/tests/groupby/test_groupby_subclass.py
@@ -63,7 +63,9 @@ def func(group):
         assert hasattr(group, "testattr")
         return group.testattr
 
-    result = custom_df.groupby("c").apply(func)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = custom_df.groupby("c").apply(func)
     expected = tm.SubclassedSeries(["hello"] * 3, index=Index([7, 8, 9], name="c"))
     tm.assert_series_equal(result, expected)
 
@@ -101,5 +103,7 @@ def test_groupby_resample_preserves_subclass(obj):
     df = df.set_index("Date")
 
     # Confirm groupby.resample() preserves dataframe type
-    result = df.groupby("Buyer").resample("5D").sum()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("Buyer").resample("5D").sum()
     assert isinstance(result, obj)
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index e0793ada679c2..d05b60fd56b5f 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -224,7 +224,9 @@ def test_grouper_creation_bug(self):
         result = g.sum()
         tm.assert_frame_equal(result, expected)
 
-        result = g.apply(lambda x: x.sum())
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = g.apply(lambda x: x.sum())
         expected["A"] = [0, 2, 4]
         expected = expected.loc[:, ["A", "B"]]
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index c9fe011f7063b..1a26559ef4447 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -470,8 +470,12 @@ def test_timegrouper_apply_return_type_series(self):
         def sumfunc_series(x):
             return Series([x["value"].sum()], ("sum",))
 
-        expected = df.groupby(Grouper(key="date")).apply(sumfunc_series)
-        result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_series)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df.groupby(Grouper(key="date")).apply(sumfunc_series)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_series)
         tm.assert_frame_equal(
             result.reset_index(drop=True), expected.reset_index(drop=True)
         )
@@ -487,8 +491,11 @@ def test_timegrouper_apply_return_type_value(self):
         def sumfunc_value(x):
             return x.value.sum()
 
-        expected = df.groupby(Grouper(key="date")).apply(sumfunc_value)
-        result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df.groupby(Grouper(key="date")).apply(sumfunc_value)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value)
         tm.assert_series_equal(
             result.reset_index(drop=True), expected.reset_index(drop=True)
         )
@@ -842,7 +849,7 @@ def test_grouper_period_index(self):
         result = period_series.groupby(period_series.index.month).sum()
 
         expected = Series(
-            range(0, periods), index=Index(range(1, periods + 1), name=index.name)
+            range(periods), index=Index(range(1, periods + 1), name=index.name)
         )
         tm.assert_series_equal(result, expected)
 
@@ -895,7 +902,9 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
         assert gb._selected_obj._get_axis(gb.axis).nlevels == 1
 
         # function that returns a Series
-        res = gb.apply(lambda x: x["Quantity"] * 2)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = gb.apply(lambda x: x["Quantity"] * 2)
 
         expected = DataFrame(
             [[36, 6, 6, 10, 2]],
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 7c50124e57e29..944dda8977882 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -327,9 +327,12 @@ def test_against_frame_and_seriesgroupby(
     )
     if frame:
         # compare against apply with DataFrame value_counts
-        expected = gp.apply(
-            _frame_value_counts, ["gender", "education"], normalize, sort, ascending
-        )
+        warn = FutureWarning if groupby == "column" else None
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(warn, match=msg):
+            expected = gp.apply(
+                _frame_value_counts, ["gender", "education"], normalize, sort, ascending
+            )
 
         if as_index:
             tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 062dfe3931423..acb4b93ba1af3 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -636,7 +636,9 @@ def f(group):
         return group[:1]
 
     grouped = df.groupby("c")
-    result = grouped.apply(f)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.apply(f)
 
     assert result["d"].dtype == np.float64
 
@@ -790,7 +792,13 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
         f = gb[["float", "float_missing"]].apply(targop)
         expected = concat([f, i], axis=1)
     else:
-        expected = gb.apply(targop)
+        if op != "shift" or not isinstance(gb_target.get("by"), (str, list)):
+            warn = None
+        else:
+            warn = FutureWarning
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(warn, match=msg):
+            expected = gb.apply(targop)
 
     expected = expected.sort_index(axis=1)
     if op == "shift":
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
index 64cbe657a8aff..87facbf529411 100644
--- a/pandas/tests/indexes/categorical/test_category.py
+++ b/pandas/tests/indexes/categorical/test_category.py
@@ -228,6 +228,13 @@ def test_isin(self):
         expected = np.array([False] * 5 + [True])
         tm.assert_numpy_array_equal(result, expected)
 
+    def test_isin_overlapping_intervals(self):
+        # GH 34974
+        idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
+        result = CategoricalIndex(idx).isin(idx)
+        expected = np.array([True, True])
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_identical(self):
         ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
         ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
index 2e7b38abf4212..b56bad7f2e833 100644
--- a/pandas/tests/indexes/datetimes/test_setops.py
+++ b/pandas/tests/indexes/datetimes/test_setops.py
@@ -343,9 +343,11 @@ def test_difference_freq(self, sort):
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 
+        # preserve frequency when the difference is a contiguous
+        # subset of the original range
         other = date_range("20160922", "20160925", freq="D")
         idx_diff = index.difference(other, sort)
-        expected = DatetimeIndex(["20160920", "20160921"], freq=None)
+        expected = DatetimeIndex(["20160920", "20160921"], freq="D")
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 
diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py
index 47efc43d5eae0..66163dad3deae 100644
--- a/pandas/tests/indexes/multi/test_partial_indexing.py
+++ b/pandas/tests/indexes/multi/test_partial_indexing.py
@@ -31,7 +31,7 @@ def df():
     dr = date_range("2016-01-01", "2016-01-03", freq="12H")
     abc = ["a", "b", "c"]
     mi = MultiIndex.from_product([dr, abc])
-    frame = DataFrame({"c1": range(0, 15)}, index=mi)
+    frame = DataFrame({"c1": range(15)}, index=mi)
     return frame
 
 
diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py
index da9838d4a2ed3..06dbb33aadf97 100644
--- a/pandas/tests/indexes/multi/test_reshape.py
+++ b/pandas/tests/indexes/multi/test_reshape.py
@@ -169,6 +169,28 @@ def test_append_names_dont_match():
     tm.assert_index_equal(result, expected)
 
 
+def test_append_overlapping_interval_levels():
+    # GH 54934
+    ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0])
+    ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5])
+    mi1 = MultiIndex.from_product([ivl1, ivl1])
+    mi2 = MultiIndex.from_product([ivl2, ivl2])
+    result = mi1.append(mi2)
+    expected = MultiIndex.from_tuples(
+        [
+            (pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)),
+            (pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)),
+            (pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)),
+            (pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)),
+            (pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)),
+            (pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)),
+            (pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)),
+            (pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)),
+        ]
+    )
+    tm.assert_index_equal(result, expected)
+
+
 def test_repeat():
     reps = 2
     numbers = [1, 2, 3]
diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py
index 5f137df281fa3..132704434829e 100644
--- a/pandas/tests/indexes/ranges/test_range.py
+++ b/pandas/tests/indexes/ranges/test_range.py
@@ -10,9 +10,6 @@
 )
 import pandas._testing as tm
 
-# aliases to make some tests easier to read
-RI = RangeIndex
-
 
 class TestRangeIndex:
     @pytest.fixture
@@ -507,25 +504,31 @@ def test_len_specialised(self, step):
     @pytest.mark.parametrize(
         "indices, expected",
         [
-            ([RI(1, 12, 5)], RI(1, 12, 5)),
-            ([RI(0, 6, 4)], RI(0, 6, 4)),
-            ([RI(1, 3), RI(3, 7)], RI(1, 7)),
-            ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
-            ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
-            ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
-            ([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
-            ([RI(-4, -8), RI(3, -4)], RI(0, 0)),
-            ([RI(-4, -8), RI(3, 5)], RI(3, 5)),
-            ([RI(-4, -2), RI(3, 5)], Index([-4, -3, 3, 4])),
-            ([RI(-2), RI(3, 5)], RI(3, 5)),
-            ([RI(2), RI(2)], Index([0, 1, 0, 1])),
-            ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
-            ([RI(2), RI(3, 5), RI(5, 8, 4)], Index([0, 1, 3, 4, 5])),
-            ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
-            ([RI(3), Index([-1, 3, 15])], Index([0, 1, 2, -1, 3, 15])),
-            ([RI(3), Index([-1, 3.1, 15.0])], Index([0, 1, 2, -1, 3.1, 15.0])),
-            ([RI(3), Index(["a", None, 14])], Index([0, 1, 2, "a", None, 14])),
-            ([RI(3, 1), Index(["a", None, 14])], Index(["a", None, 14])),
+            ([RangeIndex(1, 12, 5)], RangeIndex(1, 12, 5)),
+            ([RangeIndex(0, 6, 4)], RangeIndex(0, 6, 4)),
+            ([RangeIndex(1, 3), RangeIndex(3, 7)], RangeIndex(1, 7)),
+            ([RangeIndex(1, 5, 2), RangeIndex(5, 6)], RangeIndex(1, 6, 2)),
+            ([RangeIndex(1, 3, 2), RangeIndex(4, 7, 3)], RangeIndex(1, 7, 3)),
+            ([RangeIndex(-4, 3, 2), RangeIndex(4, 7, 2)], RangeIndex(-4, 7, 2)),
+            ([RangeIndex(-4, -8), RangeIndex(-8, -12)], RangeIndex(0, 0)),
+            ([RangeIndex(-4, -8), RangeIndex(3, -4)], RangeIndex(0, 0)),
+            ([RangeIndex(-4, -8), RangeIndex(3, 5)], RangeIndex(3, 5)),
+            ([RangeIndex(-4, -2), RangeIndex(3, 5)], Index([-4, -3, 3, 4])),
+            ([RangeIndex(-2), RangeIndex(3, 5)], RangeIndex(3, 5)),
+            ([RangeIndex(2), RangeIndex(2)], Index([0, 1, 0, 1])),
+            ([RangeIndex(2), RangeIndex(2, 5), RangeIndex(5, 8, 4)], RangeIndex(0, 6)),
+            (
+                [RangeIndex(2), RangeIndex(3, 5), RangeIndex(5, 8, 4)],
+                Index([0, 1, 3, 4, 5]),
+            ),
+            (
+                [RangeIndex(-2, 2), RangeIndex(2, 5), RangeIndex(5, 8, 4)],
+                RangeIndex(-2, 6),
+            ),
+            ([RangeIndex(3), Index([-1, 3, 15])], Index([0, 1, 2, -1, 3, 15])),
+            ([RangeIndex(3), Index([-1, 3.1, 15.0])], Index([0, 1, 2, -1, 3.1, 15.0])),
+            ([RangeIndex(3), Index(["a", None, 14])], Index([0, 1, 2, "a", None, 14])),
+            ([RangeIndex(3, 1), Index(["a", None, 14])], Index(["a", None, 14])),
         ],
     )
     def test_append(self, indices, expected):
@@ -567,7 +570,7 @@ def test_format_empty(self):
         assert empty_idx.format(name=True) == [""]
 
     @pytest.mark.parametrize(
-        "RI",
+        "ri",
         [
             RangeIndex(0, -1, -1),
             RangeIndex(0, 1, 1),
@@ -576,10 +579,10 @@ def test_format_empty(self):
             RangeIndex(-3, -5, -2),
         ],
     )
-    def test_append_len_one(self, RI):
+    def test_append_len_one(self, ri):
         # GH39401
-        result = RI.append([])
-        tm.assert_index_equal(result, RI, exact=True)
+        result = ri.append([])
+        tm.assert_index_equal(result, ri, exact=True)
 
     @pytest.mark.parametrize("base", [RangeIndex(0, 2), Index([0, 1])])
     def test_isin_range(self, base):
diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py
index cb6dce1e7ad80..6cdd6944e90ea 100644
--- a/pandas/tests/indexes/timedeltas/test_setops.py
+++ b/pandas/tests/indexes/timedeltas/test_setops.py
@@ -219,9 +219,11 @@ def test_difference_freq(self, sort):
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 
+        # preserve frequency when the difference is a contiguous
+        # subset of the original range
         other = timedelta_range("2 days", "5 days", freq="D")
         idx_diff = index.difference(other, sort)
-        expected = TimedeltaIndex(["0 days", "1 days"], freq=None)
+        expected = TimedeltaIndex(["0 days", "1 days"], freq="D")
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index 9d11827e2923e..b86e233110e88 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -148,7 +148,7 @@ def test_frame_getitem_simple_key_error(
 def test_tuple_string_column_names():
     # GH#50372
     mi = MultiIndex.from_tuples([("a", "aa"), ("a", "ab"), ("b", "ba"), ("b", "bb")])
-    df = DataFrame([range(0, 4), range(1, 5), range(2, 6)], columns=mi)
+    df = DataFrame([range(4), range(1, 5), range(2, 6)], columns=mi)
     df["single_index"] = 0
 
     df_flat = df.copy()
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index b45d197af332e..d3a6d4bf7cebf 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -16,7 +16,6 @@
     Timestamp,
 )
 import pandas._testing as tm
-from pandas.api.types import CategoricalDtype as CDT
 
 
 @pytest.fixture
@@ -25,7 +24,9 @@ def df():
         {
             "A": np.arange(6, dtype="int64"),
         },
-        index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cab")), name="B"),
+        index=CategoricalIndex(
+            list("aabbca"), dtype=CategoricalDtype(list("cab")), name="B"
+        ),
     )
 
 
@@ -35,13 +36,15 @@ def df2():
         {
             "A": np.arange(6, dtype="int64"),
         },
-        index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
+        index=CategoricalIndex(
+            list("aabbca"), dtype=CategoricalDtype(list("cabe")), name="B"
+        ),
     )
 
 
 class TestCategoricalIndex:
     def test_loc_scalar(self, df):
-        dtype = CDT(list("cab"))
+        dtype = CategoricalDtype(list("cab"))
         result = df.loc["a"]
         bidx = Series(list("aaa"), name="B").astype(dtype)
         assert bidx.dtype == dtype
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index f36fdf0d36ea9..7353b5ef76ba3 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -1,4 +1,4 @@
-from string import ascii_letters as letters
+from string import ascii_letters
 
 import numpy as np
 import pytest
@@ -24,9 +24,9 @@
 
 def random_text(nobs=100):
     # Construct a DataFrame where each row is a random slice from 'letters'
-    idxs = np.random.default_rng(2).integers(len(letters), size=(nobs, 2))
+    idxs = np.random.default_rng(2).integers(len(ascii_letters), size=(nobs, 2))
     idxs.sort(axis=1)
-    strings = [letters[x[0] : x[1]] for x in idxs]
+    strings = [ascii_letters[x[0] : x[1]] for x in idxs]
 
     return DataFrame(strings, columns=["letters"])
 
diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py
index 21d31ec8a7fb5..ecee58362f8a9 100644
--- a/pandas/tests/io/excel/test_odswriter.py
+++ b/pandas/tests/io/excel/test_odswriter.py
@@ -1,7 +1,12 @@
+from datetime import (
+    date,
+    datetime,
+)
 import re
 
 import pytest
 
+import pandas as pd
 import pandas._testing as tm
 
 from pandas.io.excel import ExcelWriter
@@ -47,3 +52,47 @@ def test_book_and_sheets_consistent(ext):
             table = odf.table.Table(name="test_name")
             writer.book.spreadsheet.addElement(table)
             assert writer.sheets == {"test_name": table}
+
+
+@pytest.mark.parametrize(
+    ["value", "cell_value_type", "cell_value_attribute", "cell_value"],
+    argvalues=[
+        (True, "boolean", "boolean-value", "true"),
+        ("test string", "string", "string-value", "test string"),
+        (1, "float", "value", "1"),
+        (1.5, "float", "value", "1.5"),
+        (
+            datetime(2010, 10, 10, 10, 10, 10),
+            "date",
+            "date-value",
+            "2010-10-10T10:10:10",
+        ),
+        (date(2010, 10, 10), "date", "date-value", "2010-10-10"),
+    ],
+)
+def test_cell_value_type(ext, value, cell_value_type, cell_value_attribute, cell_value):
+    # GH#54994 ODS: cell attributes should follow specification
+    # http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
+    from odf.namespaces import OFFICENS
+    from odf.table import (
+        TableCell,
+        TableRow,
+    )
+
+    table_cell_name = TableCell().qname
+
+    with tm.ensure_clean(ext) as f:
+        pd.DataFrame([[value]]).to_excel(f, header=False, index=False)
+
+        with pd.ExcelFile(f) as wb:
+            sheet = wb._reader.get_sheet_by_index(0)
+            sheet_rows = sheet.getElementsByType(TableRow)
+            sheet_cells = [
+                x
+                for x in sheet_rows[0].childNodes
+                if hasattr(x, "qname") and x.qname == table_cell_name
+            ]
+
+            cell = sheet_cells[0]
+            assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
+            assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 6db70c894f692..8dd9f96a05a90 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -54,6 +54,7 @@
     ),
     pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")),
     pytest.param("odf", marks=td.skip_if_no("odf")),
+    pytest.param("calamine", marks=td.skip_if_no("python_calamine")),
 ]
 
 
@@ -67,11 +68,11 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
         return False
     if engine == "odf" and read_ext != ".ods":
         return False
-    if read_ext == ".ods" and engine != "odf":
+    if read_ext == ".ods" and engine not in {"odf", "calamine"}:
         return False
     if engine == "pyxlsb" and read_ext != ".xlsb":
         return False
-    if read_ext == ".xlsb" and engine != "pyxlsb":
+    if read_ext == ".xlsb" and engine not in {"pyxlsb", "calamine"}:
         return False
     if engine == "xlrd" and read_ext != ".xls":
         return False
@@ -160,9 +161,9 @@ def test_engine_kwargs(self, read_ext, engine):
             "ods": {"foo": "abcd"},
         }
 
-        if read_ext[1:] in {"xls", "xlsb"}:
+        if engine in {"xlrd", "pyxlsb"}:
             msg = re.escape(r"open_workbook() got an unexpected keyword argument 'foo'")
-        elif read_ext[1:] == "ods":
+        elif engine == "odf":
             msg = re.escape(r"load() got an unexpected keyword argument 'foo'")
         else:
             msg = re.escape(r"load_workbook() got an unexpected keyword argument 'foo'")
@@ -194,8 +195,8 @@ def test_usecols_int(self, read_ext):
                 usecols=3,
             )
 
-    def test_usecols_list(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_usecols_list(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -218,8 +219,8 @@ def test_usecols_list(self, request, read_ext, df_ref):
         tm.assert_frame_equal(df1, df_ref, check_names=False)
         tm.assert_frame_equal(df2, df_ref, check_names=False)
 
-    def test_usecols_str(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_usecols_str(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -275,9 +276,9 @@ def test_usecols_str(self, request, read_ext, df_ref):
         "usecols", [[0, 1, 3], [0, 3, 1], [1, 0, 3], [1, 3, 0], [3, 0, 1], [3, 1, 0]]
     )
     def test_usecols_diff_positional_int_columns_order(
-        self, request, read_ext, usecols, df_ref
+        self, request, engine, read_ext, usecols, df_ref
     ):
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -298,8 +299,8 @@ def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_r
         result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols=usecols)
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    def test_read_excel_without_slicing(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_read_excel_without_slicing(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -310,8 +311,8 @@ def test_read_excel_without_slicing(self, request, read_ext, df_ref):
         result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0)
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    def test_usecols_excel_range_str(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_usecols_excel_range_str(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -398,20 +399,26 @@ def test_excel_stop_iterator(self, read_ext):
         expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
         tm.assert_frame_equal(parsed, expected)
 
-    def test_excel_cell_error_na(self, request, read_ext):
-        if read_ext == ".xlsb":
+    def test_excel_cell_error_na(self, request, engine, read_ext):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
             )
 
+        # https://github.com/tafia/calamine/issues/355
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Calamine can't extract error from ods files")
+            )
+
         parsed = pd.read_excel("test3" + read_ext, sheet_name="Sheet1")
         expected = DataFrame([[np.nan]], columns=["Test"])
         tm.assert_frame_equal(parsed, expected)
 
-    def test_excel_table(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_excel_table(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -431,8 +438,8 @@ def test_excel_table(self, request, read_ext, df_ref):
         )
         tm.assert_frame_equal(df3, df1.iloc[:-1])
 
-    def test_reader_special_dtypes(self, request, read_ext):
-        if read_ext == ".xlsb":
+    def test_reader_special_dtypes(self, request, engine, read_ext):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -800,8 +807,8 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
         result = pd.read_excel("testdateoverflow" + read_ext)
         tm.assert_frame_equal(result, expected)
 
-    def test_sheet_name(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_sheet_name(self, request, read_ext, engine, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -869,6 +876,11 @@ def test_corrupt_bytes_raises(self, engine):
                 "Unsupported format, or corrupt file: Expected BOF "
                 "record; found b'foo'"
             )
+        elif engine == "calamine":
+            from python_calamine import CalamineError
+
+            error = CalamineError
+            msg = "Cannot detect file format"
         else:
             error = BadZipFile
             msg = "File is not a zip file"
@@ -969,6 +981,14 @@ def test_reader_seconds(self, request, engine, read_ext):
                 )
             )
 
+        # GH 55045
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="ODS file contains bad datetime (seconds as text)"
+                )
+            )
+
         # Test reading times with and without milliseconds. GH5945.
         expected = DataFrame.from_dict(
             {
@@ -994,15 +1014,21 @@ def test_reader_seconds(self, request, engine, read_ext):
         actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
-    def test_read_excel_multiindex(self, request, read_ext):
+    def test_read_excel_multiindex(self, request, engine, read_ext):
         # see gh-4679
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
             )
 
+        # https://github.com/tafia/calamine/issues/354
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Last test fails in calamine")
+            )
+
         mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
         mi_file = "testmultiindex" + read_ext
 
@@ -1088,10 +1114,10 @@ def test_read_excel_multiindex(self, request, read_ext):
         ],
     )
     def test_read_excel_multiindex_blank_after_name(
-        self, request, read_ext, sheet_name, idx_lvl2
+        self, request, engine, read_ext, sheet_name, idx_lvl2
     ):
         # GH34673
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb (GH4679"
@@ -1212,9 +1238,9 @@ def test_read_excel_bool_header_arg(self, read_ext):
             with pytest.raises(TypeError, match=msg):
                 pd.read_excel("test1" + read_ext, header=arg)
 
-    def test_read_excel_skiprows(self, request, read_ext):
+    def test_read_excel_skiprows(self, request, engine, read_ext):
         # GH 4903
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1267,9 +1293,9 @@ def test_read_excel_skiprows(self, request, read_ext):
         )
         tm.assert_frame_equal(actual, expected)
 
-    def test_read_excel_skiprows_callable_not_in(self, request, read_ext):
+    def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
         # GH 4903
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1397,7 +1423,7 @@ def test_trailing_blanks(self, read_ext):
 
     def test_ignore_chartsheets_by_str(self, request, engine, read_ext):
         # GH 41448
-        if engine == "odf":
+        if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
             request.node.add_marker(
@@ -1410,7 +1436,7 @@ def test_ignore_chartsheets_by_str(self, request, engine, read_ext):
 
     def test_ignore_chartsheets_by_int(self, request, engine, read_ext):
         # GH 41448
-        if engine == "odf":
+        if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
             request.node.add_marker(
@@ -1540,8 +1566,8 @@ def test_excel_passes_na_filter(self, read_ext, na_filter):
         expected = DataFrame(expected, columns=["Test"])
         tm.assert_frame_equal(parsed, expected)
 
-    def test_excel_table_sheet_by_index(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_excel_table_sheet_by_index(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1569,8 +1595,8 @@ def test_excel_table_sheet_by_index(self, request, read_ext, df_ref):
 
         tm.assert_frame_equal(df3, df1.iloc[:-1])
 
-    def test_sheet_name(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_sheet_name(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1639,7 +1665,7 @@ def test_excel_read_binary(self, engine, read_ext):
     def test_excel_read_binary_via_read_excel(self, read_ext, engine):
         # GH 38424
         with open("test1" + read_ext, "rb") as f:
-            result = pd.read_excel(f)
+            result = pd.read_excel(f, engine=engine)
         expected = pd.read_excel("test1" + read_ext, engine=engine)
         tm.assert_frame_equal(result, expected)
 
@@ -1691,7 +1717,7 @@ def test_engine_invalid_option(self, read_ext):
 
     def test_ignore_chartsheets(self, request, engine, read_ext):
         # GH 41448
-        if engine == "odf":
+        if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
             request.node.add_marker(
@@ -1711,6 +1737,10 @@ def test_corrupt_files_closed(self, engine, read_ext):
             import xlrd
 
             errors = (BadZipFile, xlrd.biffh.XLRDError)
+        elif engine == "calamine":
+            from python_calamine import CalamineError
+
+            errors = (CalamineError,)
 
         with tm.ensure_clean(f"corrupt{read_ext}") as file:
             Path(file).write_text("corrupt", encoding="utf-8")
diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py
index 73de2b068b699..6c3bf01cb1857 100644
--- a/pandas/tests/io/formats/test_info.py
+++ b/pandas/tests/io/formats/test_info.py
@@ -1,6 +1,6 @@
 from io import StringIO
 import re
-from string import ascii_uppercase as uppercase
+from string import ascii_uppercase
 import sys
 import textwrap
 
@@ -452,9 +452,9 @@ def memory_usage(f):
         return f.memory_usage(deep=True).sum()
 
     N = 100
-    M = len(uppercase)
+    M = len(ascii_uppercase)
     index = MultiIndex.from_product(
-        [list(uppercase), date_range("20160101", periods=N)],
+        [list(ascii_uppercase), date_range("20160101", periods=N)],
         names=["id", "date"],
     )
     df = DataFrame(
diff --git a/pandas/tests/io/formats/test_series_info.py b/pandas/tests/io/formats/test_series_info.py
index 02827ee25042a..29dd704f6efa9 100644
--- a/pandas/tests/io/formats/test_series_info.py
+++ b/pandas/tests/io/formats/test_series_info.py
@@ -1,5 +1,5 @@
 from io import StringIO
-from string import ascii_uppercase as uppercase
+from string import ascii_uppercase
 import textwrap
 
 import numpy as np
@@ -165,9 +165,9 @@ def test_info_memory_usage_bug_on_multiindex():
     # GH 14308
     # memory usage introspection should not materialize .values
     N = 100
-    M = len(uppercase)
+    M = len(ascii_uppercase)
     index = MultiIndex.from_product(
-        [list(uppercase), date_range("20160101", periods=N)],
+        [list(ascii_uppercase), date_range("20160101", periods=N)],
         names=["id", "date"],
     )
     s = Series(np.random.default_rng(2).standard_normal(N * M), index=index)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index ca3ce6ba34515..b3c2e67f7c318 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2044,7 +2044,7 @@ def test_read_json_dtype_backend(self, string_storage, dtype_backend, orient):
             )
 
         if orient == "values":
-            expected.columns = list(range(0, 8))
+            expected.columns = list(range(8))
 
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py
index 5bb7097770820..d5f8c5200c4a3 100644
--- a/pandas/tests/io/json/test_ujson.py
+++ b/pandas/tests/io/json/test_ujson.py
@@ -1033,7 +1033,7 @@ def test_decode_floating_point(self, sign, float_number):
     def test_encode_big_set(self):
         s = set()
 
-        for x in range(0, 100000):
+        for x in range(100000):
             s.add(x)
 
         # Make sure no Exception is raised.
diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py
index 492b4d5ec058e..0c5a2e0d04e5a 100644
--- a/pandas/tests/io/parser/common/test_read_errors.py
+++ b/pandas/tests/io/parser/common/test_read_errors.py
@@ -15,6 +15,7 @@
 from pandas.errors import (
     EmptyDataError,
     ParserError,
+    ParserWarning,
 )
 
 from pandas import DataFrame
@@ -129,18 +130,16 @@ def test_unexpected_keyword_parameter_exception(all_parsers):
         parser.read_table("foo.tsv", foo=1)
 
 
-def test_suppress_error_output(all_parsers, capsys):
+def test_suppress_error_output(all_parsers):
     # see gh-15925
     parser = all_parsers
     data = "a\n1\n1,2,3\n4\n5,6,7"
     expected = DataFrame({"a": [1, 4]})
 
-    result = parser.read_csv(StringIO(data), on_bad_lines="skip")
+    with tm.assert_produces_warning(None):
+        result = parser.read_csv(StringIO(data), on_bad_lines="skip")
     tm.assert_frame_equal(result, expected)
 
-    captured = capsys.readouterr()
-    assert captured.err == ""
-
 
 def test_error_bad_lines(all_parsers):
     # see gh-15925
@@ -152,19 +151,18 @@ def test_error_bad_lines(all_parsers):
         parser.read_csv(StringIO(data), on_bad_lines="error")
 
 
-def test_warn_bad_lines(all_parsers, capsys):
+def test_warn_bad_lines(all_parsers):
     # see gh-15925
     parser = all_parsers
     data = "a\n1\n1,2,3\n4\n5,6,7"
     expected = DataFrame({"a": [1, 4]})
 
-    result = parser.read_csv(StringIO(data), on_bad_lines="warn")
+    with tm.assert_produces_warning(
+        ParserWarning, match="Skipping line", check_stacklevel=False
+    ):
+        result = parser.read_csv(StringIO(data), on_bad_lines="warn")
     tm.assert_frame_equal(result, expected)
 
-    captured = capsys.readouterr()
-    assert "Skipping line 3" in captured.err
-    assert "Skipping line 5" in captured.err
-
 
 def test_read_csv_wrong_num_columns(all_parsers):
     # Too few columns.
@@ -245,7 +243,7 @@ def test_bad_header_uniform_error(all_parsers):
         parser.read_csv(StringIO(data), index_col=0, on_bad_lines="error")
 
 
-def test_on_bad_lines_warn_correct_formatting(all_parsers, capsys):
+def test_on_bad_lines_warn_correct_formatting(all_parsers):
     # see gh-15925
     parser = all_parsers
     data = """1,2
@@ -256,17 +254,8 @@ def test_on_bad_lines_warn_correct_formatting(all_parsers, capsys):
 """
     expected = DataFrame({"1": "a", "2": ["b"] * 2})
 
-    result = parser.read_csv(StringIO(data), on_bad_lines="warn")
+    with tm.assert_produces_warning(
+        ParserWarning, match="Skipping line", check_stacklevel=False
+    ):
+        result = parser.read_csv(StringIO(data), on_bad_lines="warn")
     tm.assert_frame_equal(result, expected)
-
-    captured = capsys.readouterr()
-    if parser.engine == "c":
-        warn = """Skipping line 3: expected 2 fields, saw 3
-Skipping line 4: expected 2 fields, saw 3
-
-"""
-    else:
-        warn = """Skipping line 3: Expected 2 fields in line 3, saw 3
-Skipping line 4: Expected 2 fields in line 4, saw 3
-"""
-    assert captured.err == warn
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 32a010b3aeb34..18eee01f87621 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -19,7 +19,10 @@
 
 from pandas.compat import is_ci_environment
 from pandas.compat.numpy import np_version_gte1p24
-from pandas.errors import ParserError
+from pandas.errors import (
+    ParserError,
+    ParserWarning,
+)
 import pandas.util._test_decorators as td
 
 from pandas import (
@@ -461,7 +464,7 @@ def test_data_after_quote(c_parser_only):
     tm.assert_frame_equal(result, expected)
 
 
-def test_comment_whitespace_delimited(c_parser_only, capsys):
+def test_comment_whitespace_delimited(c_parser_only):
     parser = c_parser_only
     test_input = """\
 1 2
@@ -474,18 +477,17 @@ def test_comment_whitespace_delimited(c_parser_only, capsys):
 8# 1 field, NaN
 9 2 3 # skipped line
 # comment"""
-    df = parser.read_csv(
-        StringIO(test_input),
-        comment="#",
-        header=None,
-        delimiter="\\s+",
-        skiprows=0,
-        on_bad_lines="warn",
-    )
-    captured = capsys.readouterr()
-    # skipped lines 2, 3, 4, 9
-    for line_num in (2, 3, 4, 9):
-        assert f"Skipping line {line_num}" in captured.err
+    with tm.assert_produces_warning(
+        ParserWarning, match="Skipping line", check_stacklevel=False
+    ):
+        df = parser.read_csv(
+            StringIO(test_input),
+            comment="#",
+            header=None,
+            delimiter="\\s+",
+            skiprows=0,
+            on_bad_lines="warn",
+        )
     expected = DataFrame([[1, 2], [5, 2], [6, 2], [7, np.nan], [8, np.nan]])
     tm.assert_frame_equal(df, expected)
 
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index 6cadff511d95c..efab9a049a83c 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -662,3 +662,29 @@ def test_header_missing_rows(all_parsers):
     msg = r"Passed header=\[0,1,2\], len of 3, but only 2 lines in file"
     with pytest.raises(ValueError, match=msg):
         parser.read_csv(StringIO(data), header=[0, 1, 2])
+
+
+@skip_pyarrow
+def test_header_multiple_whitespaces(all_parsers):
+    # GH#54931
+    parser = all_parsers
+    data = """aa    bb(1,1)   cc(1,1)
+                0  2  3.5"""
+
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    expected = DataFrame({"aa": [0], "bb(1,1)": 2, "cc(1,1)": 3.5})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_header_delim_whitespace(all_parsers):
+    # GH#54918
+    parser = all_parsers
+    data = """a,b
+1,2
+3,4
+    """
+
+    result = parser.read_csv(StringIO(data), delim_whitespace=True)
+    expected = DataFrame({"a,b": ["1,2", "3,4"]})
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 959b988e208c1..dbd474c6ae0b9 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -274,7 +274,7 @@ def test_multi_char_sep_quotes(python_parser_only, quoting):
             parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
 
 
-def test_none_delimiter(python_parser_only, capsys):
+def test_none_delimiter(python_parser_only):
     # see gh-13374 and gh-17465
     parser = python_parser_only
     data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9"
@@ -283,12 +283,14 @@ def test_none_delimiter(python_parser_only, capsys):
     # We expect the third line in the data to be
     # skipped because it is malformed, but we do
     # not expect any errors to occur.
-    result = parser.read_csv(StringIO(data), header=0, sep=None, on_bad_lines="warn")
+    with tm.assert_produces_warning(
+        ParserWarning, match="Skipping line 3", check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(data), header=0, sep=None, on_bad_lines="warn"
+        )
     tm.assert_frame_equal(result, expected)
 
-    captured = capsys.readouterr()
-    assert "Skipping line 3" in captured.err
-
 
 @pytest.mark.parametrize("data", ['a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'])
 @pytest.mark.parametrize("skipfooter", [0, 1])
diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
index f150ed3903443..e2d785a38eb51 100644
--- a/pandas/tests/io/parser/test_textreader.py
+++ b/pandas/tests/io/parser/test_textreader.py
@@ -12,6 +12,7 @@
 
 import pandas._libs.parsers as parser
 from pandas._libs.parsers import TextReader
+from pandas.errors import ParserWarning
 
 from pandas import DataFrame
 import pandas._testing as tm
@@ -125,7 +126,7 @@ def test_integer_thousands_alt(self):
         expected = DataFrame([123456, 12500])
         tm.assert_frame_equal(result, expected)
 
-    def test_skip_bad_lines(self, capsys):
+    def test_skip_bad_lines(self):
         # too many lines, see #2430 for why
         data = "a:b:c\nd:e:f\ng:h:i\nj:k:l:m\nl:m:n\no:p:q:r"
 
@@ -145,14 +146,11 @@ def test_skip_bad_lines(self, capsys):
         }
         assert_array_dicts_equal(result, expected)
 
-        reader = TextReader(
-            StringIO(data), delimiter=":", header=None, on_bad_lines=1  # Warn
-        )
-        reader.read()
-        captured = capsys.readouterr()
-
-        assert "Skipping line 4" in captured.err
-        assert "Skipping line 6" in captured.err
+        with tm.assert_produces_warning(ParserWarning, match="Skipping line"):
+            reader = TextReader(
+                StringIO(data), delimiter=":", header=None, on_bad_lines=1  # Warn
+            )
+            reader.read()
 
     def test_header_not_enough_lines(self):
         data = "skip this\nskip this\na,b,c\n1,2,3\n4,5,6"
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 408348f555f58..1c280f98aee0a 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1015,7 +1015,7 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list):
     def test_filter_row_groups(self, pa):
         # https://github.com/pandas-dev/pandas/issues/26551
         pytest.importorskip("pyarrow")
-        df = pd.DataFrame({"a": list(range(0, 3))})
+        df = pd.DataFrame({"a": list(range(3))})
         with tm.ensure_clean() as path:
             df.to_parquet(path, engine=pa)
             result = read_parquet(
@@ -1142,6 +1142,25 @@ def test_roundtrip_decimal(self, tmp_path, pa):
         expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]")
         tm.assert_frame_equal(result, expected)
 
+    def test_infer_string_large_string_type(self, tmp_path, pa):
+        # GH#54798
+        import pyarrow as pa
+        import pyarrow.parquet as pq
+
+        path = tmp_path / "large_string.p"
+
+        table = pa.table({"a": pa.array([None, "b", "c"], pa.large_string())})
+        pq.write_table(table, path)
+
+        with pd.option_context("future.infer_string", True):
+            result = read_parquet(path)
+        expected = pd.DataFrame(
+            data={"a": [None, "b", "c"]},
+            dtype="string[pyarrow_numpy]",
+            columns=pd.Index(["a"], dtype="string[pyarrow_numpy]"),
+        )
+        tm.assert_frame_equal(result, expected)
+
 
 class TestParquetFastParquet(Base):
     def test_basic(self, fp, df_full):
@@ -1203,7 +1222,7 @@ def test_categorical(self, fp):
         check_round_trip(df, fp)
 
     def test_filter_row_groups(self, fp):
-        d = {"a": list(range(0, 3))}
+        d = {"a": list(range(3))}
         df = pd.DataFrame(d)
         with tm.ensure_clean() as path:
             df.to_parquet(path, engine=fp, compression=None, row_group_offsets=1)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 9ec0ba0b12a76..1abe0ad55a864 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -413,6 +413,8 @@ def mysql_pymysql_engine(iris_path, types_data):
         for entry in types_data:
             entry.pop("DateColWithTz")
         create_and_load_types(engine, types_data, "mysql")
+    if not insp.has_table("iris_view"):
+        create_and_load_iris_view(engine)
     yield engine
     with engine.connect() as conn:
         with conn.begin():
@@ -422,7 +424,7 @@ def mysql_pymysql_engine(iris_path, types_data):
 
 
 @pytest.fixture
-def mysql_pymysql_conn(mysql_pymysql_engine):
+def mysql_pymysql_conn(iris_path, mysql_pymysql_engine):
     with mysql_pymysql_engine.connect() as conn:
         yield conn
 
@@ -440,6 +442,8 @@ def postgresql_psycopg2_engine(iris_path, types_data):
         create_and_load_iris(engine, iris_path, "postgresql")
     if not insp.has_table("types"):
         create_and_load_types(engine, types_data, "postgresql")
+    if not insp.has_table("iris_view"):
+        create_and_load_iris_view(engine)
     yield engine
     with engine.connect() as conn:
         with conn.begin():
@@ -462,9 +466,20 @@ def sqlite_str():
 
 
 @pytest.fixture
-def sqlite_engine(sqlite_str):
+def sqlite_engine(sqlite_str, iris_path, types_data):
     sqlalchemy = pytest.importorskip("sqlalchemy")
     engine = sqlalchemy.create_engine(sqlite_str, poolclass=sqlalchemy.pool.NullPool)
+
+    insp = sqlalchemy.inspect(engine)
+    if not insp.has_table("iris"):
+        create_and_load_iris(engine, iris_path, "sqlite")
+    if not insp.has_table("iris_view"):
+        create_and_load_iris_view(engine)
+    if not insp.has_table("types"):
+        for entry in types_data:
+            entry.pop("DateColWithTz")
+        create_and_load_types(engine, types_data, "sqlite")
+
     yield engine
     engine.dispose()
 
@@ -476,17 +491,25 @@ def sqlite_conn(sqlite_engine):
 
 
 @pytest.fixture
-def sqlite_iris_str(sqlite_str, iris_path):
+def sqlite_iris_str(sqlite_str, iris_path, types_data):
     sqlalchemy = pytest.importorskip("sqlalchemy")
     engine = sqlalchemy.create_engine(sqlite_str)
-    create_and_load_iris(engine, iris_path, "sqlite")
+
+    insp = sqlalchemy.inspect(engine)
+    if not insp.has_table("iris"):
+        create_and_load_iris(engine, iris_path, "sqlite")
+    if not insp.has_table("iris_view"):
+        create_and_load_iris_view(engine)
+    if not insp.has_table("types"):
+        for entry in types_data:
+            entry.pop("DateColWithTz")
+        create_and_load_types(engine, types_data, "sqlite")
     engine.dispose()
     return sqlite_str
 
 
 @pytest.fixture
 def sqlite_iris_engine(sqlite_engine, iris_path):
-    create_and_load_iris(sqlite_engine, iris_path, "sqlite")
     return sqlite_engine
 
 
@@ -499,6 +522,7 @@ def sqlite_iris_conn(sqlite_iris_engine):
 @pytest.fixture
 def sqlite_buildin():
     with contextlib.closing(sqlite3.connect(":memory:")) as closing_conn:
+        create_and_load_iris_view(closing_conn)
         with closing_conn as conn:
             yield conn
 
@@ -1097,6 +1121,7 @@ class PandasSQLTest:
     """
 
     def load_iris_data(self, iris_path):
+        self.drop_view("iris_view", self.conn)
         self.drop_table("iris", self.conn)
         if isinstance(self.conn, sqlite3.Connection):
             create_and_load_iris_sqlite3(self.conn, iris_path)
@@ -1116,18 +1141,21 @@ def load_types_data(self, types_data):
     def _read_sql_iris_parameter(self, sql_strings):
         query = sql_strings["read_parameters"][self.flavor]
         params = ("Iris-setosa", 5.1)
-        iris_frame = self.pandasSQL.read_query(query, params=params)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=params)
         check_iris_frame(iris_frame)
 
     def _read_sql_iris_named_parameter(self, sql_strings):
         query = sql_strings["read_named_parameters"][self.flavor]
         params = {"name": "Iris-setosa", "length": 5.1}
-        iris_frame = self.pandasSQL.read_query(query, params=params)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=params)
         check_iris_frame(iris_frame)
 
     def _read_sql_iris_no_parameter_with_percent(self, sql_strings):
         query = sql_strings["read_no_parameters_with_percent"][self.flavor]
-        iris_frame = self.pandasSQL.read_query(query, params=None)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=None)
         check_iris_frame(iris_frame)
 
     def _to_sql_empty(self, test_frame1):
@@ -1157,7 +1185,8 @@ def _to_sql_with_sql_engine(self, test_frame1, engine="auto", **engine_kwargs):
     def _roundtrip(self, test_frame1):
         self.drop_table("test_frame_roundtrip", self.conn)
         assert self.pandasSQL.to_sql(test_frame1, "test_frame_roundtrip") == 4
-        result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
+        with self.pandasSQL.run_transaction():
+            result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
 
         result.set_index("level_0", inplace=True)
         # result.index.astype(int)
@@ -1207,13 +1236,14 @@ class DummyException(Exception):
         except DummyException:
             # ignore raised exception
             pass
-        res = self.pandasSQL.read_query("SELECT * FROM test_trans")
+        with self.pandasSQL.run_transaction():
+            res = self.pandasSQL.read_query("SELECT * FROM test_trans")
         assert len(res) == 0
 
         # Make sure when transaction is committed, rows do get inserted
         with self.pandasSQL.run_transaction() as trans:
             trans.execute(ins_sql)
-        res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
+            res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
         assert len(res2) == 1
 
 
@@ -1221,470 +1251,695 @@ class DummyException(Exception):
 # -- Testing the public API
 
 
-class _TestSQLApi(PandasSQLTest):
-    """
-    Base class to test the public API.
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable_iris)
+def test_api_read_sql_view(conn, request):
+    conn = request.getfixturevalue(conn)
+    iris_frame = sql.read_sql_query("SELECT * FROM iris_view", conn)
+    check_iris_frame(iris_frame)
 
-    From this two classes are derived to run these tests for both the
-    sqlalchemy mode (`TestSQLApi`) and the fallback mode
-    (`TestSQLiteFallbackApi`).  These tests are run with sqlite3. Specific
-    tests for the different sql flavours are included in `_TestSQLAlchemy`.
 
-    Notes:
-    flavor can always be passed even in SQLAlchemy mode,
-    should be correctly ignored.
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable_iris)
+def test_api_read_sql_with_chunksize_no_result(conn, request):
+    conn = request.getfixturevalue(conn)
+    query = 'SELECT * FROM iris_view WHERE "SepalLength" < 0.0'
+    with_batch = sql.read_sql_query(query, conn, chunksize=5)
+    without_batch = sql.read_sql_query(query, conn)
+    tm.assert_frame_equal(concat(with_batch), without_batch)
 
-    we don't use drop_table because that isn't part of the public api
 
-    """
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_to_sql(conn, request, test_frame1):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_frame1", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_frame1")
 
-    flavor = "sqlite"
-    mode: str
+    sql.to_sql(test_frame1, "test_frame1", conn)
+    assert sql.has_table("test_frame1", conn)
 
-    @pytest.fixture(autouse=True)
-    def setup_method(self, iris_path, types_data):
-        self.conn = self.connect()
-        self.load_iris_data(iris_path)
-        self.load_types_data(types_data)
-        self.load_test_data_and_sql()
 
-    def load_test_data_and_sql(self):
-        create_and_load_iris_view(self.conn)
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_to_sql_fail(conn, request, test_frame1):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_frame2", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_frame2")
 
-    def test_read_sql_view(self):
-        iris_frame = sql.read_sql_query("SELECT * FROM iris_view", self.conn)
-        check_iris_frame(iris_frame)
+    sql.to_sql(test_frame1, "test_frame2", conn, if_exists="fail")
+    assert sql.has_table("test_frame2", conn)
 
-    def test_read_sql_with_chunksize_no_result(self):
-        query = "SELECT * FROM iris_view WHERE SepalLength < 0.0"
-        with_batch = sql.read_sql_query(query, self.conn, chunksize=5)
-        without_batch = sql.read_sql_query(query, self.conn)
-        tm.assert_frame_equal(concat(with_batch), without_batch)
+    msg = "Table 'test_frame2' already exists"
+    with pytest.raises(ValueError, match=msg):
+        sql.to_sql(test_frame1, "test_frame2", conn, if_exists="fail")
 
-    def test_to_sql(self, test_frame1):
-        sql.to_sql(test_frame1, "test_frame1", self.conn)
-        assert sql.has_table("test_frame1", self.conn)
 
-    def test_to_sql_fail(self, test_frame1):
-        sql.to_sql(test_frame1, "test_frame2", self.conn, if_exists="fail")
-        assert sql.has_table("test_frame2", self.conn)
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_to_sql_replace(conn, request, test_frame1):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_frame3", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_frame3")
 
-        msg = "Table 'test_frame2' already exists"
-        with pytest.raises(ValueError, match=msg):
-            sql.to_sql(test_frame1, "test_frame2", self.conn, if_exists="fail")
+    sql.to_sql(test_frame1, "test_frame3", conn, if_exists="fail")
+    # Add to table again
+    sql.to_sql(test_frame1, "test_frame3", conn, if_exists="replace")
+    assert sql.has_table("test_frame3", conn)
 
-    def test_to_sql_replace(self, test_frame1):
-        sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="fail")
-        # Add to table again
-        sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="replace")
-        assert sql.has_table("test_frame3", self.conn)
+    num_entries = len(test_frame1)
+    num_rows = count_rows(conn, "test_frame3")
 
-        num_entries = len(test_frame1)
-        num_rows = count_rows(self.conn, "test_frame3")
+    assert num_rows == num_entries
 
-        assert num_rows == num_entries
 
-    def test_to_sql_append(self, test_frame1):
-        assert sql.to_sql(test_frame1, "test_frame4", self.conn, if_exists="fail") == 4
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_to_sql_append(conn, request, test_frame1):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_frame4", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_frame4")
 
-        # Add to table again
-        assert (
-            sql.to_sql(test_frame1, "test_frame4", self.conn, if_exists="append") == 4
-        )
-        assert sql.has_table("test_frame4", self.conn)
+    assert sql.to_sql(test_frame1, "test_frame4", conn, if_exists="fail") == 4
 
-        num_entries = 2 * len(test_frame1)
-        num_rows = count_rows(self.conn, "test_frame4")
+    # Add to table again
+    assert sql.to_sql(test_frame1, "test_frame4", conn, if_exists="append") == 4
+    assert sql.has_table("test_frame4", conn)
 
-        assert num_rows == num_entries
+    num_entries = 2 * len(test_frame1)
+    num_rows = count_rows(conn, "test_frame4")
 
-    def test_to_sql_type_mapping(self, test_frame3):
-        sql.to_sql(test_frame3, "test_frame5", self.conn, index=False)
-        result = sql.read_sql("SELECT * FROM test_frame5", self.conn)
+    assert num_rows == num_entries
 
-        tm.assert_frame_equal(test_frame3, result)
 
-    def test_to_sql_series(self):
-        s = Series(np.arange(5, dtype="int64"), name="series")
-        sql.to_sql(s, "test_series", self.conn, index=False)
-        s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn)
-        tm.assert_frame_equal(s.to_frame(), s2)
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_to_sql_type_mapping(conn, request, test_frame3):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_frame5", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_frame5")
 
-    def test_roundtrip(self, test_frame1):
-        sql.to_sql(test_frame1, "test_frame_roundtrip", con=self.conn)
-        result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=self.conn)
+    sql.to_sql(test_frame3, "test_frame5", conn, index=False)
+    result = sql.read_sql("SELECT * FROM test_frame5", conn)
 
-        # HACK!
-        result.index = test_frame1.index
-        result.set_index("level_0", inplace=True)
-        result.index.astype(int)
-        result.index.name = None
-        tm.assert_frame_equal(result, test_frame1)
+    tm.assert_frame_equal(test_frame3, result)
 
-    def test_roundtrip_chunksize(self, test_frame1):
-        sql.to_sql(
-            test_frame1,
-            "test_frame_roundtrip",
-            con=self.conn,
-            index=False,
-            chunksize=2,
-        )
-        result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=self.conn)
-        tm.assert_frame_equal(result, test_frame1)
 
-    def test_execute_sql(self):
-        # drop_sql = "DROP TABLE IF EXISTS test"  # should already be done
-        with sql.pandasSQL_builder(self.conn) as pandas_sql:
-            iris_results = pandas_sql.execute("SELECT * FROM iris")
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_to_sql_series(conn, request):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_series", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_series")
+
+    s = Series(np.arange(5, dtype="int64"), name="series")
+    sql.to_sql(s, "test_series", conn, index=False)
+    s2 = sql.read_sql_query("SELECT * FROM test_series", conn)
+    tm.assert_frame_equal(s.to_frame(), s2)
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_roundtrip(conn, request, test_frame1):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_frame_roundtrip", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_frame_roundtrip")
+
+    sql.to_sql(test_frame1, "test_frame_roundtrip", con=conn)
+    result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=conn)
+
+    # HACK!
+    result.index = test_frame1.index
+    result.set_index("level_0", inplace=True)
+    result.index.astype(int)
+    result.index.name = None
+    tm.assert_frame_equal(result, test_frame1)
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_roundtrip_chunksize(conn, request, test_frame1):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_frame_roundtrip", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_frame_roundtrip")
+
+    sql.to_sql(
+        test_frame1,
+        "test_frame_roundtrip",
+        con=conn,
+        index=False,
+        chunksize=2,
+    )
+    result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=conn)
+    tm.assert_frame_equal(result, test_frame1)
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable_iris)
+def test_api_execute_sql(conn, request):
+    # drop_sql = "DROP TABLE IF EXISTS test"  # should already be done
+    conn = request.getfixturevalue(conn)
+    with sql.pandasSQL_builder(conn) as pandas_sql:
+        iris_results = pandas_sql.execute("SELECT * FROM iris")
         row = iris_results.fetchone()
-        tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"])
+    tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"])
 
-    def test_date_parsing(self):
-        # Test date parsing in read_sql
-        # No Parsing
-        df = sql.read_sql_query("SELECT * FROM types", self.conn)
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_date_parsing(conn, request):
+    conn_name = conn
+    if conn_name in {"sqlite_buildin", "sqlite_str"}:
+        pytest.skip("types tables not created in sqlite_buildin or sqlite_str fixture")
+
+    conn = request.getfixturevalue(conn)
+    # Test date parsing in read_sql
+    # No Parsing
+    df = sql.read_sql_query("SELECT * FROM types", conn)
+    if not ("mysql" in conn_name or "postgres" in conn_name):
         assert not issubclass(df.DateCol.dtype.type, np.datetime64)
 
-        df = sql.read_sql_query(
-            "SELECT * FROM types", self.conn, parse_dates=["DateCol"]
-        )
-        assert issubclass(df.DateCol.dtype.type, np.datetime64)
-        assert df.DateCol.tolist() == [
-            Timestamp(2000, 1, 3, 0, 0, 0),
-            Timestamp(2000, 1, 4, 0, 0, 0),
-        ]
+    df = sql.read_sql_query("SELECT * FROM types", conn, parse_dates=["DateCol"])
+    assert issubclass(df.DateCol.dtype.type, np.datetime64)
+    assert df.DateCol.tolist() == [
+        Timestamp(2000, 1, 3, 0, 0, 0),
+        Timestamp(2000, 1, 4, 0, 0, 0),
+    ]
 
-        df = sql.read_sql_query(
-            "SELECT * FROM types",
-            self.conn,
-            parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"},
-        )
-        assert issubclass(df.DateCol.dtype.type, np.datetime64)
-        assert df.DateCol.tolist() == [
-            Timestamp(2000, 1, 3, 0, 0, 0),
-            Timestamp(2000, 1, 4, 0, 0, 0),
-        ]
+    df = sql.read_sql_query(
+        "SELECT * FROM types",
+        conn,
+        parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"},
+    )
+    assert issubclass(df.DateCol.dtype.type, np.datetime64)
+    assert df.DateCol.tolist() == [
+        Timestamp(2000, 1, 3, 0, 0, 0),
+        Timestamp(2000, 1, 4, 0, 0, 0),
+    ]
 
-        df = sql.read_sql_query(
-            "SELECT * FROM types", self.conn, parse_dates=["IntDateCol"]
-        )
-        assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
-        assert df.IntDateCol.tolist() == [
-            Timestamp(1986, 12, 25, 0, 0, 0),
-            Timestamp(2013, 1, 1, 0, 0, 0),
-        ]
+    df = sql.read_sql_query("SELECT * FROM types", conn, parse_dates=["IntDateCol"])
+    assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
+    assert df.IntDateCol.tolist() == [
+        Timestamp(1986, 12, 25, 0, 0, 0),
+        Timestamp(2013, 1, 1, 0, 0, 0),
+    ]
 
-        df = sql.read_sql_query(
-            "SELECT * FROM types", self.conn, parse_dates={"IntDateCol": "s"}
-        )
-        assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
-        assert df.IntDateCol.tolist() == [
-            Timestamp(1986, 12, 25, 0, 0, 0),
-            Timestamp(2013, 1, 1, 0, 0, 0),
-        ]
+    df = sql.read_sql_query(
+        "SELECT * FROM types", conn, parse_dates={"IntDateCol": "s"}
+    )
+    assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
+    assert df.IntDateCol.tolist() == [
+        Timestamp(1986, 12, 25, 0, 0, 0),
+        Timestamp(2013, 1, 1, 0, 0, 0),
+    ]
 
-        df = sql.read_sql_query(
+    df = sql.read_sql_query(
+        "SELECT * FROM types",
+        conn,
+        parse_dates={"IntDateOnlyCol": "%Y%m%d"},
+    )
+    assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64)
+    assert df.IntDateOnlyCol.tolist() == [
+        Timestamp("2010-10-10"),
+        Timestamp("2010-12-12"),
+    ]
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+@pytest.mark.parametrize("error", ["ignore", "raise", "coerce"])
+@pytest.mark.parametrize(
+    "read_sql, text, mode",
+    [
+        (sql.read_sql, "SELECT * FROM types", ("sqlalchemy", "fallback")),
+        (sql.read_sql, "types", ("sqlalchemy")),
+        (
+            sql.read_sql_query,
             "SELECT * FROM types",
-            self.conn,
-            parse_dates={"IntDateOnlyCol": "%Y%m%d"},
-        )
-        assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64)
-        assert df.IntDateOnlyCol.tolist() == [
-            Timestamp("2010-10-10"),
-            Timestamp("2010-12-12"),
-        ]
+            ("sqlalchemy", "fallback"),
+        ),
+        (sql.read_sql_table, "types", ("sqlalchemy")),
+    ],
+)
+def test_api_custom_dateparsing_error(
+    conn, request, read_sql, text, mode, error, types_data_frame
+):
+    conn_name = conn
+    if conn_name in {"sqlite_buildin", "sqlite_str"}:
+        pytest.skip("types tables not created in sqlite_buildin or sqlite_str fixture")
 
-    @pytest.mark.parametrize("error", ["ignore", "raise", "coerce"])
-    @pytest.mark.parametrize(
-        "read_sql, text, mode",
-        [
-            (sql.read_sql, "SELECT * FROM types", ("sqlalchemy", "fallback")),
-            (sql.read_sql, "types", ("sqlalchemy")),
-            (
-                sql.read_sql_query,
-                "SELECT * FROM types",
-                ("sqlalchemy", "fallback"),
-            ),
-            (sql.read_sql_table, "types", ("sqlalchemy")),
-        ],
+    conn = request.getfixturevalue(conn)
+
+    expected = types_data_frame.astype({"DateCol": "datetime64[ns]"})
+
+    result = read_sql(
+        text,
+        con=conn,
+        parse_dates={
+            "DateCol": {"errors": error},
+        },
     )
-    def test_custom_dateparsing_error(
-        self, read_sql, text, mode, error, types_data_frame
-    ):
-        if self.mode in mode:
-            expected = types_data_frame.astype({"DateCol": "datetime64[ns]"})
+    if "postgres" in conn_name:
+        # TODO: clean up types_data_frame fixture
+        result = result.drop(columns=["DateColWithTz"])
+        result["BoolCol"] = result["BoolCol"].astype(int)
+        result["BoolColWithNull"] = result["BoolColWithNull"].astype(float)
 
-            result = read_sql(
-                text,
-                con=self.conn,
-                parse_dates={
-                    "DateCol": {"errors": error},
-                },
-            )
+    tm.assert_frame_equal(result, expected)
 
-            tm.assert_frame_equal(result, expected)
 
-    def test_date_and_index(self):
-        # Test case where same column appears in parse_date and index_col
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_date_and_index(conn, request):
+    # Test case where same column appears in parse_date and index_col
+    conn_name = conn
+    if conn_name in {"sqlite_buildin", "sqlite_str"}:
+        pytest.skip("types tables not created in sqlite_buildin or sqlite_str fixture")
 
-        df = sql.read_sql_query(
-            "SELECT * FROM types",
-            self.conn,
-            index_col="DateCol",
-            parse_dates=["DateCol", "IntDateCol"],
-        )
+    conn = request.getfixturevalue(conn)
+    df = sql.read_sql_query(
+        "SELECT * FROM types",
+        conn,
+        index_col="DateCol",
+        parse_dates=["DateCol", "IntDateCol"],
+    )
 
-        assert issubclass(df.index.dtype.type, np.datetime64)
-        assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
+    assert issubclass(df.index.dtype.type, np.datetime64)
+    assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
 
-    def test_timedelta(self):
-        # see #6921
-        df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame()
-        with tm.assert_produces_warning(UserWarning):
-            result_count = df.to_sql(name="test_timedelta", con=self.conn)
-        assert result_count == 2
-        result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn)
-        tm.assert_series_equal(result["foo"], df["foo"].view("int64"))
-
-    def test_complex_raises(self):
-        df = DataFrame({"a": [1 + 1j, 2j]})
-        msg = "Complex datatypes not supported"
-        with pytest.raises(ValueError, match=msg):
-            assert df.to_sql("test_complex", con=self.conn) is None
 
-    @pytest.mark.parametrize(
-        "index_name,index_label,expected",
-        [
-            # no index name, defaults to 'index'
-            (None, None, "index"),
-            # specifying index_label
-            (None, "other_label", "other_label"),
-            # using the index name
-            ("index_name", None, "index_name"),
-            # has index name, but specifying index_label
-            ("index_name", "other_label", "other_label"),
-            # index name is integer
-            (0, None, "0"),
-            # index name is None but index label is integer
-            (None, 0, "0"),
-        ],
-    )
-    def test_to_sql_index_label(self, index_name, index_label, expected):
-        temp_frame = DataFrame({"col1": range(4)})
-        temp_frame.index.name = index_name
-        query = "SELECT * FROM test_index_label"
-        sql.to_sql(temp_frame, "test_index_label", self.conn, index_label=index_label)
-        frame = sql.read_sql_query(query, self.conn)
-        assert frame.columns[0] == expected
-
-    def test_to_sql_index_label_multiindex(self):
-        expected_row_count = 4
-        temp_frame = DataFrame(
-            {"col1": range(4)},
-            index=MultiIndex.from_product([("A0", "A1"), ("B0", "B1")]),
-        )
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_timedelta(conn, request):
+    # see #6921
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_timedelta", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_timedelta")
 
-        # no index name, defaults to 'level_0' and 'level_1'
-        result = sql.to_sql(temp_frame, "test_index_label", self.conn)
-        assert result == expected_row_count
-        frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn)
-        assert frame.columns[0] == "level_0"
-        assert frame.columns[1] == "level_1"
+    df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame()
+    with tm.assert_produces_warning(UserWarning):
+        result_count = df.to_sql(name="test_timedelta", con=conn)
+    assert result_count == 2
+    result = sql.read_sql_query("SELECT * FROM test_timedelta", conn)
+    tm.assert_series_equal(result["foo"], df["foo"].view("int64"))
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_complex_raises(conn, request):
+    conn = request.getfixturevalue(conn)
+    df = DataFrame({"a": [1 + 1j, 2j]})
+    msg = "Complex datatypes not supported"
+    with pytest.raises(ValueError, match=msg):
+        assert df.to_sql("test_complex", con=conn) is None
 
-        # specifying index_label
-        result = sql.to_sql(
-            temp_frame,
-            "test_index_label",
-            self.conn,
-            if_exists="replace",
-            index_label=["A", "B"],
-        )
-        assert result == expected_row_count
-        frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn)
-        assert frame.columns[:2].tolist() == ["A", "B"]
 
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+@pytest.mark.parametrize(
+    "index_name,index_label,expected",
+    [
+        # no index name, defaults to 'index'
+        (None, None, "index"),
+        # specifying index_label
+        (None, "other_label", "other_label"),
         # using the index name
-        temp_frame.index.names = ["A", "B"]
-        result = sql.to_sql(
-            temp_frame, "test_index_label", self.conn, if_exists="replace"
+        ("index_name", None, "index_name"),
+        # has index name, but specifying index_label
+        ("index_name", "other_label", "other_label"),
+        # index name is integer
+        (0, None, "0"),
+        # index name is None but index label is integer
+        (None, 0, "0"),
+    ],
+)
+def test_api_to_sql_index_label(conn, request, index_name, index_label, expected):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_index_label", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_index_label")
+
+    temp_frame = DataFrame({"col1": range(4)})
+    temp_frame.index.name = index_name
+    query = "SELECT * FROM test_index_label"
+    sql.to_sql(temp_frame, "test_index_label", conn, index_label=index_label)
+    frame = sql.read_sql_query(query, conn)
+    assert frame.columns[0] == expected
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_to_sql_index_label_multiindex(conn, request):
+    conn_name = conn
+    if "mysql" in conn_name:
+        request.node.add_marker(
+            pytest.mark.xfail(reason="MySQL can fail using TEXT without length as key")
         )
-        assert result == expected_row_count
-        frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn)
-        assert frame.columns[:2].tolist() == ["A", "B"]
 
-        # has index name, but specifying index_label
-        result = sql.to_sql(
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_index_label", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_index_label")
+
+    expected_row_count = 4
+    temp_frame = DataFrame(
+        {"col1": range(4)},
+        index=MultiIndex.from_product([("A0", "A1"), ("B0", "B1")]),
+    )
+
+    # no index name, defaults to 'level_0' and 'level_1'
+    result = sql.to_sql(temp_frame, "test_index_label", conn)
+    assert result == expected_row_count
+    frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
+    assert frame.columns[0] == "level_0"
+    assert frame.columns[1] == "level_1"
+
+    # specifying index_label
+    result = sql.to_sql(
+        temp_frame,
+        "test_index_label",
+        conn,
+        if_exists="replace",
+        index_label=["A", "B"],
+    )
+    assert result == expected_row_count
+    frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
+    assert frame.columns[:2].tolist() == ["A", "B"]
+
+    # using the index name
+    temp_frame.index.names = ["A", "B"]
+    result = sql.to_sql(temp_frame, "test_index_label", conn, if_exists="replace")
+    assert result == expected_row_count
+    frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
+    assert frame.columns[:2].tolist() == ["A", "B"]
+
+    # has index name, but specifying index_label
+    result = sql.to_sql(
+        temp_frame,
+        "test_index_label",
+        conn,
+        if_exists="replace",
+        index_label=["C", "D"],
+    )
+    assert result == expected_row_count
+    frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
+    assert frame.columns[:2].tolist() == ["C", "D"]
+
+    msg = "Length of 'index_label' should match number of levels, which is 2"
+    with pytest.raises(ValueError, match=msg):
+        sql.to_sql(
             temp_frame,
             "test_index_label",
-            self.conn,
+            conn,
             if_exists="replace",
-            index_label=["C", "D"],
+            index_label="C",
         )
-        assert result == expected_row_count
-        frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn)
-        assert frame.columns[:2].tolist() == ["C", "D"]
 
-        msg = "Length of 'index_label' should match number of levels, which is 2"
-        with pytest.raises(ValueError, match=msg):
-            sql.to_sql(
-                temp_frame,
-                "test_index_label",
-                self.conn,
-                if_exists="replace",
-                index_label="C",
-            )
 
-    def test_multiindex_roundtrip(self):
-        df = DataFrame.from_records(
-            [(1, 2.1, "line1"), (2, 1.5, "line2")],
-            columns=["A", "B", "C"],
-            index=["A", "B"],
-        )
-
-        df.to_sql(name="test_multiindex_roundtrip", con=self.conn)
-        result = sql.read_sql_query(
-            "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"]
-        )
-        tm.assert_frame_equal(df, result, check_index_type=True)
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_multiindex_roundtrip(conn, request):
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_multiindex_roundtrip", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_multiindex_roundtrip")
+
+    df = DataFrame.from_records(
+        [(1, 2.1, "line1"), (2, 1.5, "line2")],
+        columns=["A", "B", "C"],
+        index=["A", "B"],
+    )
 
-    @pytest.mark.parametrize(
-        "dtype",
-        [
-            None,
-            int,
-            float,
-            {"A": int, "B": float},
-        ],
+    df.to_sql(name="test_multiindex_roundtrip", con=conn)
+    result = sql.read_sql_query(
+        "SELECT * FROM test_multiindex_roundtrip", conn, index_col=["A", "B"]
     )
-    def test_dtype_argument(self, dtype):
-        # GH10285 Add dtype argument to read_sql_query
-        df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"])
-        assert df.to_sql(name="test_dtype_argument", con=self.conn) == 2
-
-        expected = df.astype(dtype)
-        result = sql.read_sql_query(
-            "SELECT A, B FROM test_dtype_argument", con=self.conn, dtype=dtype
-        )
+    tm.assert_frame_equal(df, result, check_index_type=True)
 
-        tm.assert_frame_equal(result, expected)
 
-    def test_integer_col_names(self):
-        df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
-        sql.to_sql(df, "test_frame_integer_col_names", self.conn, if_exists="replace")
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        None,
+        int,
+        float,
+        {"A": int, "B": float},
+    ],
+)
+def test_api_dtype_argument(conn, request, dtype):
+    # GH10285 Add dtype argument to read_sql_query
+    conn_name = conn
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_dtype_argument", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_dtype_argument")
 
-    def test_get_schema(self, test_frame1):
-        create_sql = sql.get_schema(test_frame1, "test", con=self.conn)
-        assert "CREATE" in create_sql
+    df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"])
+    assert df.to_sql(name="test_dtype_argument", con=conn) == 2
 
-    def test_get_schema_with_schema(self, test_frame1):
-        # GH28486
-        create_sql = sql.get_schema(test_frame1, "test", con=self.conn, schema="pypi")
-        assert "CREATE TABLE pypi." in create_sql
+    expected = df.astype(dtype)
 
-    def test_get_schema_dtypes(self):
-        if self.mode == "sqlalchemy":
-            from sqlalchemy import Integer
+    if "postgres" in conn_name:
+        query = 'SELECT "A", "B" FROM test_dtype_argument'
+    else:
+        query = "SELECT A, B FROM test_dtype_argument"
+    result = sql.read_sql_query(query, con=conn, dtype=dtype)
 
-            dtype = Integer
-        else:
-            dtype = "INTEGER"
+    tm.assert_frame_equal(result, expected)
 
-        float_frame = DataFrame({"a": [1.1, 1.2], "b": [2.1, 2.2]})
-        create_sql = sql.get_schema(
-            float_frame, "test", con=self.conn, dtype={"b": dtype}
-        )
-        assert "CREATE" in create_sql
-        assert "INTEGER" in create_sql
 
-    def test_get_schema_keys(self, test_frame1):
-        frame = DataFrame({"Col1": [1.1, 1.2], "Col2": [2.1, 2.2]})
-        create_sql = sql.get_schema(frame, "test", con=self.conn, keys="Col1")
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_integer_col_names(conn, request):
+    conn = request.getfixturevalue(conn)
+    df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
+    sql.to_sql(df, "test_frame_integer_col_names", conn, if_exists="replace")
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_get_schema(conn, request, test_frame1):
+    conn = request.getfixturevalue(conn)
+    create_sql = sql.get_schema(test_frame1, "test", con=conn)
+    assert "CREATE" in create_sql
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_get_schema_with_schema(conn, request, test_frame1):
+    # GH28486
+    conn = request.getfixturevalue(conn)
+    create_sql = sql.get_schema(test_frame1, "test", con=conn, schema="pypi")
+    assert "CREATE TABLE pypi." in create_sql
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_get_schema_dtypes(conn, request):
+    conn_name = conn
+    conn = request.getfixturevalue(conn)
+    float_frame = DataFrame({"a": [1.1, 1.2], "b": [2.1, 2.2]})
+
+    if conn_name == "sqlite_buildin":
+        dtype = "INTEGER"
+    else:
+        from sqlalchemy import Integer
+
+        dtype = Integer
+    create_sql = sql.get_schema(float_frame, "test", con=conn, dtype={"b": dtype})
+    assert "CREATE" in create_sql
+    assert "INTEGER" in create_sql
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_get_schema_keys(conn, request, test_frame1):
+    conn_name = conn
+    conn = request.getfixturevalue(conn)
+    frame = DataFrame({"Col1": [1.1, 1.2], "Col2": [2.1, 2.2]})
+    create_sql = sql.get_schema(frame, "test", con=conn, keys="Col1")
+
+    if "mysql" in conn_name:
+        constraint_sentence = "CONSTRAINT test_pk PRIMARY KEY (`Col1`)"
+    else:
         constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")'
-        assert constraint_sentence in create_sql
+    assert constraint_sentence in create_sql
 
-        # multiple columns as key (GH10385)
-        create_sql = sql.get_schema(test_frame1, "test", con=self.conn, keys=["A", "B"])
+    # multiple columns as key (GH10385)
+    create_sql = sql.get_schema(test_frame1, "test", con=conn, keys=["A", "B"])
+    if "mysql" in conn_name:
+        constraint_sentence = "CONSTRAINT test_pk PRIMARY KEY (`A`, `B`)"
+    else:
         constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")'
-        assert constraint_sentence in create_sql
+    assert constraint_sentence in create_sql
 
-    def test_chunksize_read(self):
-        df = DataFrame(
-            np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde")
-        )
-        df.to_sql(name="test_chunksize", con=self.conn, index=False)
 
-        # reading the query in one time
-        res1 = sql.read_sql_query("select * from test_chunksize", self.conn)
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_chunksize_read(conn, request):
+    conn_name = conn
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_chunksize", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_chunksize")
+
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde")
+    )
+    df.to_sql(name="test_chunksize", con=conn, index=False)
+
+    # reading the query in one time
+    res1 = sql.read_sql_query("select * from test_chunksize", conn)
+
+    # reading the query in chunks with read_sql_query
+    res2 = DataFrame()
+    i = 0
+    sizes = [5, 5, 5, 5, 2]
+
+    for chunk in sql.read_sql_query("select * from test_chunksize", conn, chunksize=5):
+        res2 = concat([res2, chunk], ignore_index=True)
+        assert len(chunk) == sizes[i]
+        i += 1
+
+    tm.assert_frame_equal(res1, res2)
 
-        # reading the query in chunks with read_sql_query
-        res2 = DataFrame()
+    # reading the query in chunks with read_sql_query
+    if conn_name == "sqlite_buildin":
+        with pytest.raises(NotImplementedError, match=""):
+            sql.read_sql_table("test_chunksize", conn, chunksize=5)
+    else:
+        res3 = DataFrame()
         i = 0
         sizes = [5, 5, 5, 5, 2]
 
-        for chunk in sql.read_sql_query(
-            "select * from test_chunksize", self.conn, chunksize=5
-        ):
-            res2 = concat([res2, chunk], ignore_index=True)
+        for chunk in sql.read_sql_table("test_chunksize", conn, chunksize=5):
+            res3 = concat([res3, chunk], ignore_index=True)
             assert len(chunk) == sizes[i]
             i += 1
 
-        tm.assert_frame_equal(res1, res2)
+        tm.assert_frame_equal(res1, res3)
 
-        # reading the query in chunks with read_sql_query
-        if self.mode == "sqlalchemy":
-            res3 = DataFrame()
-            i = 0
-            sizes = [5, 5, 5, 5, 2]
 
-            for chunk in sql.read_sql_table("test_chunksize", self.conn, chunksize=5):
-                res3 = concat([res3, chunk], ignore_index=True)
-                assert len(chunk) == sizes[i]
-                i += 1
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_categorical(conn, request):
+    # GH8624
+    # test that categorical gets written correctly as dense column
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_categorical", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_categorical")
 
-            tm.assert_frame_equal(res1, res3)
+    df = DataFrame(
+        {
+            "person_id": [1, 2, 3],
+            "person_name": ["John P. Doe", "Jane Dove", "John P. Doe"],
+        }
+    )
+    df2 = df.copy()
+    df2["person_name"] = df2["person_name"].astype("category")
 
-    def test_categorical(self):
-        # GH8624
-        # test that categorical gets written correctly as dense column
-        df = DataFrame(
-            {
-                "person_id": [1, 2, 3],
-                "person_name": ["John P. Doe", "Jane Dove", "John P. Doe"],
-            }
-        )
-        df2 = df.copy()
-        df2["person_name"] = df2["person_name"].astype("category")
+    df2.to_sql(name="test_categorical", con=conn, index=False)
+    res = sql.read_sql_query("SELECT * FROM test_categorical", conn)
 
-        df2.to_sql(name="test_categorical", con=self.conn, index=False)
-        res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn)
+    tm.assert_frame_equal(res, df)
 
-        tm.assert_frame_equal(res, df)
 
-    def test_unicode_column_name(self):
-        # GH 11431
-        df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"])
-        df.to_sql(name="test_unicode", con=self.conn, index=False)
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_unicode_column_name(conn, request):
+    # GH 11431
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_unicode", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_unicode")
 
-    def test_escaped_table_name(self):
-        # GH 13206
-        df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]})
-        df.to_sql(name="d1187b08-4943-4c8d-a7f6", con=self.conn, index=False)
+    df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"])
+    df.to_sql(name="test_unicode", con=conn, index=False)
 
-        res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn)
 
-        tm.assert_frame_equal(res, df)
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_escaped_table_name(conn, request):
+    # GH 13206
+    conn_name = conn
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("d1187b08-4943-4c8d-a7f6", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("d1187b08-4943-4c8d-a7f6")
 
-    def test_read_sql_duplicate_columns(self):
-        # GH#53117
-        df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1})
-        df.to_sql(name="test_table", con=self.conn, index=False)
+    df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]})
+    df.to_sql(name="d1187b08-4943-4c8d-a7f6", con=conn, index=False)
 
-        result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn)
-        expected = DataFrame(
-            [[1, 0.1, 2, 1], [2, 0.2, 3, 1], [3, 0.3, 4, 1]],
-            columns=["a", "b", "a", "c"],
-        )
-        tm.assert_frame_equal(result, expected)
+    if "postgres" in conn_name:
+        query = 'SELECT * FROM "d1187b08-4943-4c8d-a7f6"'
+    else:
+        query = "SELECT * FROM `d1187b08-4943-4c8d-a7f6`"
+    res = sql.read_sql_query(query, conn)
+
+    tm.assert_frame_equal(res, df)
+
+
+@pytest.mark.db
+@pytest.mark.parametrize("conn", all_connectable)
+def test_api_read_sql_duplicate_columns(conn, request):
+    # GH#53117
+    conn = request.getfixturevalue(conn)
+    if sql.has_table("test_table", conn):
+        with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
+            pandasSQL.drop_table("test_table")
+
+    df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1})
+    df.to_sql(name="test_table", con=conn, index=False)
+
+    result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", conn)
+    expected = DataFrame(
+        [[1, 0.1, 2, 1], [2, 0.2, 3, 1], [3, 0.3, 4, 1]],
+        columns=["a", "b", "a", "c"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+class _TestSQLApi(PandasSQLTest):
+    """
+    Base class to test the public API.
+
+    From this two classes are derived to run these tests for both the
+    sqlalchemy mode (`TestSQLApi`) and the fallback mode
+    (`TestSQLiteFallbackApi`).  These tests are run with sqlite3. Specific
+    tests for the different sql flavours are included in `_TestSQLAlchemy`.
+
+    Notes:
+    flavor can always be passed even in SQLAlchemy mode,
+    should be correctly ignored.
+
+    we don't use drop_table because that isn't part of the public api
+
+    """
+
+    flavor = "sqlite"
+    mode: str
+
+    @pytest.fixture(autouse=True)
+    def setup_method(self, iris_path, types_data):
+        self.conn = self.connect()
+        self.load_iris_data(iris_path)
+        self.load_types_data(types_data)
+        self.load_test_data_and_sql()
+
+    def load_test_data_and_sql(self):
+        create_and_load_iris_view(self.conn)
 
 
 @pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="SQLAlchemy not installed")
@@ -2962,6 +3217,13 @@ def test_read_sql_string_inference(self):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_roundtripping_datetimes(self):
+        # GH#54877
+        df = DataFrame({"t": [datetime(2020, 12, 31, 12)]}, dtype="datetime64[ns]")
+        df.to_sql("test", self.conn, if_exists="replace", index=False)
+        result = pd.read_sql("select * from test", self.conn).iloc[0, 0]
+        assert result == "2020-12-31 12:00:00.000000"
+
 
 @pytest.mark.db
 class TestMySQLAlchemy(_TestSQLAlchemy):
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 7459aa1df8f3e..cd504616b6c5d 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -798,7 +798,7 @@ def test_missing_value_generator(self):
         expected_values.insert(0, ".")
         for t in types:
             offset = valid_range[t][1]
-            for i in range(0, 27):
+            for i in range(27):
                 val = StataMissingValue(offset + 1 + i)
                 assert val.string == expected_values[i]
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 66ecb93385a87..a955fa0b096f0 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1077,8 +1077,12 @@ def test_resample_segfault(unit):
         all_wins_and_wagers, columns=("ID", "timestamp", "A", "B")
     ).set_index("timestamp")
     df.index = df.index.as_unit(unit)
-    result = df.groupby("ID").resample("5min").sum()
-    expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum())
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("ID").resample("5min").sum()
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum())
     tm.assert_frame_equal(result, expected)
 
 
@@ -1097,7 +1101,9 @@ def test_resample_dtype_preservation(unit):
     result = df.resample("1D").ffill()
     assert result.val.dtype == np.int32
 
-    result = df.groupby("group").resample("1D").ffill()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("group").resample("1D").ffill()
     assert result.val.dtype == np.int32
 
 
@@ -1823,8 +1829,12 @@ def f(data, add_arg):
 
     # Testing dataframe
     df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10))
-    result = df.groupby("A").resample("D").agg(f, multiplier).astype(float)
-    expected = df.groupby("A").resample("D").mean().multiply(multiplier)
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").resample("D").agg(f, multiplier).astype(float)
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby("A").resample("D").mean().multiply(multiplier)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 1b20a7b99d1d7..f331851596317 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -77,7 +77,9 @@ def test_groupby_resample_api():
     )
     index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"])
     expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index)
-    result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 6f4f1154907dc..d47a8132f26bb 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -68,8 +68,12 @@ def test_deferred_with_groupby():
     def f_0(x):
         return x.set_index("date").resample("D").asfreq()
 
-    expected = df.groupby("id").apply(f_0)
-    result = df.set_index("date").groupby("id").resample("D").asfreq()
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby("id").apply(f_0)
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.set_index("date").groupby("id").resample("D").asfreq()
     tm.assert_frame_equal(result, expected)
 
     df = DataFrame(
@@ -83,8 +87,12 @@ def f_0(x):
     def f_1(x):
         return x.resample("1D").ffill()
 
-    expected = df.groupby("group").apply(f_1)
-    result = df.groupby("group").resample("1D").ffill()
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby("group").apply(f_1)
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("group").resample("1D").ffill()
     tm.assert_frame_equal(result, expected)
 
 
@@ -99,7 +107,9 @@ def test_getitem(test_frame):
     result = g.B.resample("2s").mean()
     tm.assert_series_equal(result, expected)
 
-    result = g.resample("2s").mean().B
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = g.resample("2s").mean().B
     tm.assert_series_equal(result, expected)
 
 
@@ -230,8 +240,12 @@ def test_methods(f, test_frame):
     g = test_frame.groupby("A")
     r = g.resample("2s")
 
-    result = getattr(r, f)()
-    expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = getattr(r, f)()
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
     tm.assert_equal(result, expected)
 
 
@@ -248,8 +262,12 @@ def test_methods_nunique(test_frame):
 def test_methods_std_var(f, test_frame):
     g = test_frame.groupby("A")
     r = g.resample("2s")
-    result = getattr(r, f)(ddof=1)
-    expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = getattr(r, f)(ddof=1)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
     tm.assert_frame_equal(result, expected)
 
 
@@ -258,18 +276,24 @@ def test_apply(test_frame):
     r = g.resample("2s")
 
     # reduction
-    expected = g.resample("2s").sum()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = g.resample("2s").sum()
 
     def f_0(x):
         return x.resample("2s").sum()
 
-    result = r.apply(f_0)
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = r.apply(f_0)
     tm.assert_frame_equal(result, expected)
 
     def f_1(x):
         return x.resample("2s").apply(lambda y: y.sum())
 
-    result = g.apply(f_1)
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = g.apply(f_1)
     # y.sum() results in int64 instead of int32 on 32-bit architectures
     expected = expected.astype("int64")
     tm.assert_frame_equal(result, expected)
@@ -337,7 +361,9 @@ def test_resample_groupby_with_label():
     # GH 13235
     index = date_range("2000-01-01", freq="2D", periods=5)
     df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
-    result = df.groupby("col0").resample("1W", label="left").sum()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("col0").resample("1W", label="left").sum()
 
     mi = [
         np.array([0, 0, 1, 2], dtype=np.int64),
@@ -357,7 +383,9 @@ def test_consistency_with_window(test_frame):
     # consistent return values with window
     df = test_frame
     expected = Index([1, 2, 3], name="A")
-    result = df.groupby("A").resample("2s").mean()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").resample("2s").mean()
     assert result.index.nlevels == 2
     tm.assert_index_equal(result.index.levels[0], expected)
 
@@ -455,7 +483,9 @@ def test_resample_groupby_agg_listlike():
 def test_empty(keys):
     # GH 26411
     df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
-    result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
     expected = (
         DataFrame(columns=["a", "b"])
         .set_index(keys, drop=False)
@@ -478,7 +508,8 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
     if consolidate:
         df = df._consolidate()
 
-    result = df.groupby(["key"]).resample("W", on="date").min()
+    with tm.assert_produces_warning(FutureWarning):
+        result = df.groupby(["key"]).resample("W", on="date").min()
     idx = pd.MultiIndex.from_arrays(
         [
             ["A"] * 3 + ["B"] * 3,
@@ -530,7 +561,9 @@ def test_resample_no_index(keys):
     df = DataFrame([], columns=["a", "b", "date"])
     df["date"] = pd.to_datetime(df["date"])
     df = df.set_index("date")
-    result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
     expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
     expected["date"] = pd.to_datetime(expected["date"])
     expected = expected.set_index("date", append=True, drop=True)
@@ -577,7 +610,9 @@ def test_groupby_resample_size_all_index_same():
         {"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
         index=date_range("31/12/2000 18:00", freq="H", periods=12),
     )
-    result = df.groupby("A").resample("D").size()
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A").resample("D").size()
     expected = Series(
         3,
         index=pd.MultiIndex.from_tuples(
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
index d7fdbc4fe5f08..8b1eab552c97d 100644
--- a/pandas/tests/resample/test_time_grouper.py
+++ b/pandas/tests/resample/test_time_grouper.py
@@ -323,12 +323,14 @@ def test_groupby_resample_interpolate():
 
     df["week_starting"] = date_range("01/01/2018", periods=3, freq="W")
 
-    result = (
-        df.set_index("week_starting")
-        .groupby("volume")
-        .resample("1D")
-        .interpolate(method="linear")
-    )
+    msg = "DataFrameGroupBy.resample operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = (
+            df.set_index("week_starting")
+            .groupby("volume")
+            .resample("1D")
+            .interpolate(method="linear")
+        )
 
     expected_ind = pd.MultiIndex.from_tuples(
         [
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index 3efcd930af581..5dde863f246d1 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -858,3 +858,12 @@ def test_concat_multiindex_with_category():
     )
     expected = expected.set_index(["c1", "c2"])
     tm.assert_frame_equal(result, expected)
+
+
+def test_concat_ea_upcast():
+    # GH#54848
+    df1 = DataFrame(["a"], dtype="string")
+    df2 = DataFrame([1], dtype="Int64")
+    result = concat([df1, df2])
+    expected = DataFrame(["a", 1], index=[0, 0])
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
index 2f50a19189987..12d28c388d508 100644
--- a/pandas/tests/reshape/concat/test_datetimes.py
+++ b/pandas/tests/reshape/concat/test_datetimes.py
@@ -77,23 +77,23 @@ def test_concat_datetime_timezone(self):
 
         exp_idx = DatetimeIndex(
             [
-                "2010-12-31 15:00:00+00:00",
-                "2010-12-31 16:00:00+00:00",
-                "2010-12-31 17:00:00+00:00",
                 "2010-12-31 23:00:00+00:00",
                 "2011-01-01 00:00:00+00:00",
                 "2011-01-01 01:00:00+00:00",
+                "2010-12-31 15:00:00+00:00",
+                "2010-12-31 16:00:00+00:00",
+                "2010-12-31 17:00:00+00:00",
             ]
         )
 
         expected = DataFrame(
             [
-                [np.nan, 1],
-                [np.nan, 2],
-                [np.nan, 3],
                 [1, np.nan],
                 [2, np.nan],
                 [3, np.nan],
+                [np.nan, 1],
+                [np.nan, 2],
+                [np.nan, 3],
             ],
             index=exp_idx,
             columns=["a", "b"],
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 02d7e2059e8e1..d889ae2e4806b 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -26,7 +26,6 @@
     TimedeltaIndex,
 )
 import pandas._testing as tm
-from pandas.api.types import CategoricalDtype as CDT
 from pandas.core.reshape.concat import concat
 from pandas.core.reshape.merge import (
     MergeError,
@@ -582,11 +581,11 @@ def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2):
         df_empty = df[:0]
         expected = DataFrame(
             {
-                "value_x": Series(dtype=df.dtypes["value"]),
                 "key": Series(dtype=df.dtypes["key"]),
+                "value_x": Series(dtype=df.dtypes["value"]),
                 "value_y": Series(dtype=df.dtypes["value"]),
             },
-            columns=["value_x", "key", "value_y"],
+            columns=["key", "value_x", "value_y"],
         )
         actual = df_empty.merge(df, on="key")
         tm.assert_frame_equal(actual, expected)
@@ -889,13 +888,13 @@ def test_merge_on_datetime64tz_empty(self):
         result = left.merge(right, on="date")
         expected = DataFrame(
             {
+                "date": Series(dtype=dtz),
                 "value_x": Series(dtype=float),
                 "date2_x": Series(dtype=dtz),
-                "date": Series(dtype=dtz),
                 "value_y": Series(dtype=float),
                 "date2_y": Series(dtype=dtz),
             },
-            columns=["value_x", "date2_x", "date", "value_y", "date2_y"],
+            columns=["date", "value_x", "date2_x", "value_y", "date2_y"],
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1827,11 +1826,9 @@ def test_merge_empty(self, left_empty, how, exp):
         if exp == "left":
             expected = DataFrame({"A": [2, 1], "B": [3, 4], "C": [np.nan, np.nan]})
         elif exp == "right":
-            expected = DataFrame({"B": [np.nan], "A": [1], "C": [5]})
+            expected = DataFrame({"A": [1], "B": [np.nan], "C": [5]})
         elif exp == "empty":
             expected = DataFrame(columns=["A", "B", "C"], dtype="int64")
-            if left_empty:
-                expected = expected[["B", "A", "C"]]
         elif exp == "empty_cross":
             expected = DataFrame(columns=["A_x", "B", "A_y", "C"], dtype="int64")
 
@@ -1844,7 +1841,7 @@ def left():
         {
             "X": Series(
                 np.random.default_rng(2).choice(["foo", "bar"], size=(10,))
-            ).astype(CDT(["foo", "bar"])),
+            ).astype(CategoricalDtype(["foo", "bar"])),
             "Y": np.random.default_rng(2).choice(["one", "two", "three"], size=(10,)),
         }
     )
@@ -1853,7 +1850,10 @@ def left():
 @pytest.fixture
 def right():
     return DataFrame(
-        {"X": Series(["foo", "bar"]).astype(CDT(["foo", "bar"])), "Z": [1, 2]}
+        {
+            "X": Series(["foo", "bar"]).astype(CategoricalDtype(["foo", "bar"])),
+            "Z": [1, 2],
+        }
     )
 
 
@@ -2004,8 +2004,8 @@ def test_other_columns(self, left, right):
         "change",
         [
             lambda x: x,
-            lambda x: x.astype(CDT(["foo", "bar", "bah"])),
-            lambda x: x.astype(CDT(ordered=True)),
+            lambda x: x.astype(CategoricalDtype(["foo", "bar", "bah"])),
+            lambda x: x.astype(CategoricalDtype(ordered=True)),
         ],
     )
     def test_dtype_on_merged_different(self, change, join_type, left, right):
@@ -2112,11 +2112,13 @@ def test_merging_with_bool_or_int_cateorical_column(
         # GH 17187
         # merging with a boolean/int categorical column
         df1 = DataFrame({"id": [1, 2, 3, 4], "cat": category_column})
-        df1["cat"] = df1["cat"].astype(CDT(categories, ordered=ordered))
+        df1["cat"] = df1["cat"].astype(CategoricalDtype(categories, ordered=ordered))
         df2 = DataFrame({"id": [2, 4], "num": [1, 9]})
         result = df1.merge(df2)
         expected = DataFrame({"id": [2, 4], "cat": expected_categories, "num": [1, 9]})
-        expected["cat"] = expected["cat"].astype(CDT(categories, ordered=ordered))
+        expected["cat"] = expected["cat"].astype(
+            CategoricalDtype(categories, ordered=ordered)
+        )
         tm.assert_frame_equal(expected, result)
 
     def test_merge_on_int_array(self):
@@ -2481,14 +2483,12 @@ def test_merge_multiindex_columns():
     result = frame_x.merge(frame_y, on="id", suffixes=((l_suf, r_suf)))
 
     # Constructing the expected results
-    expected_labels = [letter + l_suf for letter in letters] + [
-        letter + r_suf for letter in letters
-    ]
-    expected_index = MultiIndex.from_product(
-        [expected_labels, numbers], names=["outer", "inner"]
-    )
+    tuples = [(letter + l_suf, num) for letter in letters for num in numbers]
+    tuples += [("id", "")]
+    tuples += [(letter + r_suf, num) for letter in letters for num in numbers]
+
+    expected_index = MultiIndex.from_tuples(tuples, names=["outer", "inner"])
     expected = DataFrame(columns=expected_index)
-    expected["id"] = ""
 
     tm.assert_frame_equal(result, expected)
 
@@ -2947,3 +2947,38 @@ def test_merge_ea_int_and_float_numpy():
 
     result = df2.merge(df1)
     tm.assert_frame_equal(result, expected.astype("float64"))
+
+
+def test_merge_arrow_string_index(any_string_dtype):
+    # GH#54894
+    pytest.importorskip("pyarrow")
+    left = DataFrame({"a": ["a", "b"]}, dtype=any_string_dtype)
+    right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype=any_string_dtype))
+    result = left.merge(right, left_on="a", right_index=True, how="left")
+    expected = DataFrame(
+        {"a": Series(["a", "b"], dtype=any_string_dtype), "b": [1, np.nan]}
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("left_empty", [True, False])
+@pytest.mark.parametrize("right_empty", [True, False])
+def test_merge_empty_frames_column_order(left_empty, right_empty):
+    # GH 51929
+    df1 = DataFrame(1, index=[0], columns=["A", "B"])
+    df2 = DataFrame(1, index=[0], columns=["A", "C", "D"])
+
+    if left_empty:
+        df1 = df1.iloc[:0]
+    if right_empty:
+        df2 = df2.iloc[:0]
+
+    result = merge(df1, df2, on=["A"], how="outer")
+    expected = DataFrame(1, index=[0], columns=["A", "B", "C", "D"])
+    if left_empty and right_empty:
+        expected = expected.iloc[:0]
+    elif left_empty:
+        expected.loc[:, "B"] = np.nan
+    elif right_empty:
+        expected.loc[:, ["C", "D"]] = np.nan
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
index b2a6ac49fdff2..3a284f7732ac1 100644
--- a/pandas/tests/reshape/test_cut.py
+++ b/pandas/tests/reshape/test_cut.py
@@ -21,7 +21,7 @@
     to_datetime,
 )
 import pandas._testing as tm
-from pandas.api.types import CategoricalDtype as CDT
+from pandas.api.types import CategoricalDtype
 import pandas.core.reshape.tile as tmod
 
 
@@ -359,7 +359,7 @@ def test_cut_return_intervals():
         IntervalIndex.from_breaks(exp_bins, closed="right").take(
             [0, 0, 0, 1, 1, 1, 2, 2, 2]
         )
-    ).astype(CDT(ordered=True))
+    ).astype(CategoricalDtype(ordered=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -370,7 +370,7 @@ def test_series_ret_bins():
 
     expected = Series(
         IntervalIndex.from_breaks([-0.003, 1.5, 3], closed="right").repeat(2)
-    ).astype(CDT(ordered=True))
+    ).astype(CategoricalDtype(ordered=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -445,7 +445,7 @@ def test_datetime_bin(conv):
                 Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])),
             ]
         )
-    ).astype(CDT(ordered=True))
+    ).astype(CategoricalDtype(ordered=True))
 
     bins = [conv(v) for v in bin_data]
     result = Series(cut(data, bins=bins))
@@ -491,7 +491,7 @@ def test_datetime_cut(data):
                 ),
             ]
         )
-    ).astype(CDT(ordered=True))
+    ).astype(CategoricalDtype(ordered=True))
     tm.assert_series_equal(Series(result), expected)
 
 
@@ -534,7 +534,7 @@ def test_datetime_tz_cut(bins, box):
                 ),
             ]
         )
-    ).astype(CDT(ordered=True))
+    ).astype(CategoricalDtype(ordered=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -700,7 +700,7 @@ def test_cut_with_duplicated_index_lowest_included():
 def test_cut_with_nonexact_categorical_indices():
     # GH 42424
 
-    ser = Series(range(0, 100))
+    ser = Series(range(100))
     ser1 = cut(ser, 10).value_counts().head(5)
     ser2 = cut(ser, 10).value_counts().tail(5)
     result = DataFrame({"1": ser1, "2": ser2})
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 46da18445e135..28ad133a0c8d6 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -23,7 +23,7 @@
     date_range,
 )
 import pandas._testing as tm
-from pandas.api.types import CategoricalDtype as CDT
+from pandas.api.types import CategoricalDtype
 from pandas.core.reshape import reshape as reshape_lib
 from pandas.core.reshape.pivot import pivot_table
 
@@ -33,7 +33,7 @@ def dropna(request):
     return request.param
 
 
-@pytest.fixture(params=[([0] * 4, [1] * 4), (range(0, 3), range(1, 4))])
+@pytest.fixture(params=[([0] * 4, [1] * 4), (range(3), range(1, 4))])
 def interval_values(request, closed):
     left, right = request.param
     return Categorical(pd.IntervalIndex.from_arrays(left, right, closed))
@@ -215,14 +215,16 @@ def test_pivot_table_dropna_categoricals(self, dropna):
             {
                 "A": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
                 "B": [1, 2, 3, 1, 2, 3, 1, 2, 3],
-                "C": range(0, 9),
+                "C": range(9),
             }
         )
 
-        df["A"] = df["A"].astype(CDT(categories, ordered=False))
+        df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
         result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
         expected_columns = Series(["a", "b", "c"], name="A")
-        expected_columns = expected_columns.astype(CDT(categories, ordered=False))
+        expected_columns = expected_columns.astype(
+            CategoricalDtype(categories, ordered=False)
+        )
         expected_index = Series([1, 2, 3], name="B")
         expected = DataFrame(
             [[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], [2.0, 5.0, 8.0]],
diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py
index 907eeca6e9b5e..bcfbe5ed1aa20 100644
--- a/pandas/tests/reshape/test_qcut.py
+++ b/pandas/tests/reshape/test_qcut.py
@@ -20,7 +20,7 @@
     timedelta_range,
 )
 import pandas._testing as tm
-from pandas.api.types import CategoricalDtype as CDT
+from pandas.api.types import CategoricalDtype
 
 from pandas.tseries.offsets import (
     Day,
@@ -129,7 +129,9 @@ def test_qcut_return_intervals():
     exp_levels = np.array(
         [Interval(-0.001, 2.664), Interval(2.664, 5.328), Interval(5.328, 8)]
     )
-    exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True))
+    exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(
+        CategoricalDtype(ordered=True)
+    )
     tm.assert_series_equal(res, exp)
 
 
@@ -199,7 +201,7 @@ def test_single_quantile(data, start, end, length, labels):
 
     if labels is None:
         intervals = IntervalIndex([Interval(start, end)] * length, closed="right")
-        expected = Series(intervals).astype(CDT(ordered=True))
+        expected = Series(intervals).astype(CategoricalDtype(ordered=True))
     else:
         expected = Series([0] * length, dtype=np.intp)
 
@@ -249,7 +251,7 @@ def test_datetime_tz_qcut(bins):
                 ),
             ]
         )
-    ).astype(CDT(ordered=True))
+    ).astype(CategoricalDtype(ordered=True))
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
index f1d8acf47b29a..cb797a4168088 100644
--- a/pandas/tests/scalar/timedelta/test_timedelta.py
+++ b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -927,7 +927,6 @@ def test_timedelta_hash_equality(self):
 
     @pytest.mark.xfail(
         reason="pd.Timedelta violates the Python hash invariant (GH#44504).",
-        raises=AssertionError,
     )
     @given(
         st.integers(
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index a388f0f80fa94..dbaba146e600e 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -486,3 +486,12 @@ def test_getitem_str_second_with_datetimeindex():
     msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
     with pytest.raises(KeyError, match=msg):
         df[df.index[2]]
+
+
+def test_compare_datetime_with_all_none():
+    # GH#54870
+    ser = Series(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
+    ser2 = Series([None, None])
+    result = ser > ser2
+    expected = Series([False, False])
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py
index 324ab1204e16e..10b2e98586365 100644
--- a/pandas/tests/series/methods/test_drop_duplicates.py
+++ b/pandas/tests/series/methods/test_drop_duplicates.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 
+import pandas as pd
 from pandas import (
     Categorical,
     Series,
@@ -256,3 +257,11 @@ def test_duplicated_arrow_dtype(self):
         result = ser.drop_duplicates()
         expected = Series([True, False, None], dtype="bool[pyarrow]")
         tm.assert_series_equal(result, expected)
+
+    def test_drop_duplicates_arrow_strings(self):
+        # GH#54904
+        pa = pytest.importorskip("pyarrow")
+        ser = Series(["a", "a"], dtype=pd.ArrowDtype(pa.string()))
+        result = ser.drop_duplicates()
+        expecetd = Series(["a"], dtype=pd.ArrowDtype(pa.string()))
+        tm.assert_series_equal(result, expecetd)
diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py
index 619690f400d98..549f429f09d35 100644
--- a/pandas/tests/series/methods/test_interpolate.py
+++ b/pandas/tests/series/methods/test_interpolate.py
@@ -858,3 +858,11 @@ def test_interpolate_asfreq_raises(self):
         with pytest.raises(ValueError, match=msg):
             with tm.assert_produces_warning(FutureWarning, match=msg2):
                 ser.interpolate(method="asfreq")
+
+    def test_interpolate_fill_value(self):
+        # GH#54920
+        pytest.importorskip("scipy")
+        ser = Series([np.nan, 0, 1, np.nan, 3, np.nan])
+        result = ser.interpolate(method="nearest", fill_value=0)
+        expected = Series([np.nan, 0, 1, 1, 3, 0])
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py
index 4dabf7b87e2cd..6740b8756853e 100644
--- a/pandas/tests/series/methods/test_pct_change.py
+++ b/pandas/tests/series/methods/test_pct_change.py
@@ -107,3 +107,11 @@ def test_pct_change_with_duplicated_indices(fill_method):
 
     expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)
     tm.assert_series_equal(result, expected)
+
+
+def test_pct_change_no_warning_na_beginning():
+    # GH#54981
+    ser = Series([None, None, 1, 2, 3])
+    result = ser.pct_change()
+    expected = Series([np.nan, np.nan, np.nan, 1, 0.5])
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
index bce7d2d554004..016208f2d2026 100644
--- a/pandas/tests/series/methods/test_reindex.py
+++ b/pandas/tests/series/methods/test_reindex.py
@@ -159,9 +159,9 @@ def test_reindex_inference():
 
 def test_reindex_downcasting():
     # GH4618 shifted series downcasting
-    s = Series(False, index=range(0, 5))
+    s = Series(False, index=range(5))
     result = s.shift(1).bfill()
-    expected = Series(False, index=range(0, 5))
+    expected = Series(False, index=range(5))
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 856c31b9ccb06..661290fb00d13 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -17,7 +17,7 @@
     is_integer_dtype,
     is_object_dtype,
 )
-from pandas.core.dtypes.dtypes import CategoricalDtype as CDT
+from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
 from pandas import (
@@ -1182,7 +1182,7 @@ def test_value_counts(self):
         with tm.assert_produces_warning(FutureWarning, match=msg):
             result = algos.value_counts(factor)
         breaks = [-1.606, -1.018, -0.431, 0.155, 0.741]
-        index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True))
+        index = IntervalIndex.from_breaks(breaks).astype(CategoricalDtype(ordered=True))
         expected = Series([1, 0, 2, 1], index=index, name="count")
         tm.assert_series_equal(result.sort_index(), expected.sort_index())
 
@@ -1412,6 +1412,19 @@ def test_value_counts_uint64(self):
 
         tm.assert_series_equal(result, expected)
 
+    def test_value_counts_series(self):
+        # GH#54857
+        values = np.array([3, 1, 2, 3, 4, np.nan])
+        result = Series(values).value_counts(bins=3)
+        expected = Series(
+            [2, 2, 1],
+            index=IntervalIndex.from_tuples(
+                [(0.996, 2.0), (2.0, 3.0), (3.0, 4.0)], dtype="interval[float64, right]"
+            ),
+            name="count",
+        )
+        tm.assert_series_equal(result, expected)
+
 
 class TestDuplicated:
     def test_duplicated_with_nas(self):
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index ab00e18fc4812..b8e0173ee131f 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -99,7 +99,9 @@ def test_rolling(self, f, roll_frame):
         r = g.rolling(window=4)
 
         result = getattr(r, f)()
-        expected = g.apply(lambda x: getattr(x.rolling(4), f)())
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(lambda x: getattr(x.rolling(4), f)())
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
@@ -113,7 +115,9 @@ def test_rolling_ddof(self, f, roll_frame):
         r = g.rolling(window=4)
 
         result = getattr(r, f)(ddof=1)
-        expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
@@ -129,9 +133,11 @@ def test_rolling_quantile(self, interpolation, roll_frame):
         r = g.rolling(window=4)
 
         result = r.quantile(0.4, interpolation=interpolation)
-        expected = g.apply(
-            lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation)
-        )
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(
+                lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation)
+            )
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
@@ -174,7 +180,9 @@ def test_rolling_corr_cov_other_diff_size_as_groups(self, f, roll_frame):
         def func(x):
             return getattr(x.rolling(4), f)(roll_frame)
 
-        expected = g.apply(func)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(func)
         # GH 39591: The grouped column should be all np.nan
         # (groupby.apply inserts 0s for cov)
         expected["A"] = np.nan
@@ -190,7 +198,9 @@ def test_rolling_corr_cov_pairwise(self, f, roll_frame):
         def func(x):
             return getattr(x.B.rolling(4), f)(pairwise=True)
 
-        expected = g.apply(func)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(func)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -235,7 +245,9 @@ def test_rolling_apply(self, raw, roll_frame):
 
         # reduction
         result = r.apply(lambda x: x.sum(), raw=raw)
-        expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
@@ -466,20 +478,23 @@ def test_groupby_rolling_subset_with_closed(self):
         # GH 35549
         df = DataFrame(
             {
-                "column1": range(6),
-                "column2": range(6),
-                "group": 3 * ["A", "B"],
-                "date": [Timestamp("2019-01-01")] * 6,
+                "column1": range(8),
+                "column2": range(8),
+                "group": ["A"] * 4 + ["B"] * 4,
+                "date": [
+                    Timestamp(date)
+                    for date in ["2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02"]
+                ]
+                * 2,
             }
         )
         result = (
             df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum()
         )
         expected = Series(
-            [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
-            index=MultiIndex.from_tuples(
-                [("A", Timestamp("2019-01-01"))] * 3
-                + [("B", Timestamp("2019-01-01"))] * 3,
+            [np.nan, np.nan, 1.0, 1.0, np.nan, np.nan, 9.0, 9.0],
+            index=MultiIndex.from_frame(
+                df[["group", "date"]],
                 names=["group", "date"],
             ),
             name="column1",
@@ -490,10 +505,14 @@ def test_groupby_subset_rolling_subset_with_closed(self):
         # GH 35549
         df = DataFrame(
             {
-                "column1": range(6),
-                "column2": range(6),
-                "group": 3 * ["A", "B"],
-                "date": [Timestamp("2019-01-01")] * 6,
+                "column1": range(8),
+                "column2": range(8),
+                "group": ["A"] * 4 + ["B"] * 4,
+                "date": [
+                    Timestamp(date)
+                    for date in ["2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02"]
+                ]
+                * 2,
             }
         )
 
@@ -503,10 +522,9 @@ def test_groupby_subset_rolling_subset_with_closed(self):
             .sum()
         )
         expected = Series(
-            [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
-            index=MultiIndex.from_tuples(
-                [("A", Timestamp("2019-01-01"))] * 3
-                + [("B", Timestamp("2019-01-01"))] * 3,
+            [np.nan, np.nan, 1.0, 1.0, np.nan, np.nan, 9.0, 9.0],
+            index=MultiIndex.from_frame(
+                df[["group", "date"]],
                 names=["group", "date"],
             ),
             name="column1",
@@ -778,9 +796,13 @@ def test_groupby_rolling_resulting_multiindex3(self):
     def test_groupby_rolling_object_doesnt_affect_groupby_apply(self, roll_frame):
         # GH 39732
         g = roll_frame.groupby("A", group_keys=False)
-        expected = g.apply(lambda x: x.rolling(4).sum()).index
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(lambda x: x.rolling(4).sum()).index
         _ = g.rolling(window=4)
-        result = g.apply(lambda x: x.rolling(4).sum()).index
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = g.apply(lambda x: x.rolling(4).sum()).index
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -954,11 +976,13 @@ def test_groupby_monotonic(self):
         df["date"] = to_datetime(df["date"])
         df = df.sort_values("date")
 
-        expected = (
-            df.set_index("date")
-            .groupby("name")
-            .apply(lambda x: x.rolling("180D")["amount"].sum())
-        )
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = (
+                df.set_index("date")
+                .groupby("name")
+                .apply(lambda x: x.rolling("180D")["amount"].sum())
+            )
         result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
         tm.assert_series_equal(result, expected)
 
@@ -977,9 +1001,13 @@ def test_datelike_on_monotonic_within_each_group(self):
             }
         )
 
-        expected = (
-            df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
-        )
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = (
+                df.set_index("B")
+                .groupby("A")
+                .apply(lambda x: x.rolling("4s")["C"].mean())
+            )
         result = df.groupby("A").rolling("4s", on="B").C.mean()
         tm.assert_series_equal(result, expected)
 
@@ -1009,7 +1037,9 @@ def test_expanding(self, f, frame):
         r = g.expanding()
 
         result = getattr(r, f)()
-        expected = g.apply(lambda x: getattr(x.expanding(), f)())
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(lambda x: getattr(x.expanding(), f)())
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
@@ -1023,7 +1053,9 @@ def test_expanding_ddof(self, f, frame):
         r = g.expanding()
 
         result = getattr(r, f)(ddof=0)
-        expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
@@ -1039,9 +1071,11 @@ def test_expanding_quantile(self, interpolation, frame):
         r = g.expanding()
 
         result = r.quantile(0.4, interpolation=interpolation)
-        expected = g.apply(
-            lambda x: x.expanding().quantile(0.4, interpolation=interpolation)
-        )
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(
+                lambda x: x.expanding().quantile(0.4, interpolation=interpolation)
+            )
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
@@ -1059,7 +1093,9 @@ def test_expanding_corr_cov(self, f, frame):
         def func_0(x):
             return getattr(x.expanding(), f)(frame)
 
-        expected = g.apply(func_0)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(func_0)
         # GH 39591: groupby.apply returns 1 instead of nan for windows
         # with all nan values
         null_idx = list(range(20, 61)) + list(range(72, 113))
@@ -1074,7 +1110,9 @@ def func_0(x):
         def func_1(x):
             return getattr(x.B.expanding(), f)(pairwise=True)
 
-        expected = g.apply(func_1)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(func_1)
         tm.assert_series_equal(result, expected)
 
     def test_expanding_apply(self, raw, frame):
@@ -1083,7 +1121,11 @@ def test_expanding_apply(self, raw, frame):
 
         # reduction
         result = r.apply(lambda x: x.sum(), raw=raw)
-        expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = g.apply(
+                lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)
+            )
         # groupby.apply doesn't drop the grouped-by column
         expected = expected.drop("A", axis=1)
         # GH 39732
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index f4d903dc19fb7..3fe922539780d 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -100,9 +100,9 @@ def test_freq_window_not_implemented(window):
         index=date_range("2015-12-24", periods=10, freq="D"),
     )
     with pytest.raises(
-        NotImplementedError, match="step is not supported with frequency windows"
+        NotImplementedError, match="^step (not implemented|is not supported)"
     ):
-        df.rolling("3D", step=3)
+        df.rolling(window, step=3).sum()
 
 
 @pytest.mark.parametrize("agg", ["cov", "corr"])
@@ -304,6 +304,76 @@ def test_datetimelike_nonunique_index_centering(
     tm.assert_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "closed,expected",
+    [
+        ("left", [np.nan, np.nan, 1, 1, 1, 10, 14, 14, 18, 21]),
+        ("neither", [np.nan, np.nan, 1, 1, 1, 9, 5, 5, 13, 8]),
+        ("right", [0, 1, 3, 6, 10, 14, 11, 18, 21, 17]),
+        ("both", [0, 1, 3, 6, 10, 15, 20, 27, 26, 30]),
+    ],
+)
+def test_variable_window_nonunique(closed, expected, frame_or_series):
+    # GH 20712
+    index = DatetimeIndex(
+        [
+            "2011-01-01",
+            "2011-01-01",
+            "2011-01-02",
+            "2011-01-02",
+            "2011-01-02",
+            "2011-01-03",
+            "2011-01-04",
+            "2011-01-04",
+            "2011-01-05",
+            "2011-01-06",
+        ]
+    )
+
+    df = frame_or_series(range(10), index=index, dtype=float)
+    expected = frame_or_series(expected, index=index, dtype=float)
+
+    result = df.rolling("2D", closed=closed).sum()
+
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "closed,expected",
+    [
+        ("left", [np.nan, np.nan, 1, 1, 1, 10, 15, 15, 18, 21]),
+        ("neither", [np.nan, np.nan, 1, 1, 1, 10, 15, 15, 13, 8]),
+        ("right", [0, 1, 3, 6, 10, 15, 21, 28, 21, 17]),
+        ("both", [0, 1, 3, 6, 10, 15, 21, 28, 26, 30]),
+    ],
+)
+def test_variable_offset_window_nonunique(closed, expected, frame_or_series):
+    # GH 20712
+    index = DatetimeIndex(
+        [
+            "2011-01-01",
+            "2011-01-01",
+            "2011-01-02",
+            "2011-01-02",
+            "2011-01-02",
+            "2011-01-03",
+            "2011-01-04",
+            "2011-01-04",
+            "2011-01-05",
+            "2011-01-06",
+        ]
+    )
+
+    df = frame_or_series(range(10), index=index, dtype=float)
+    expected = frame_or_series(expected, index=index, dtype=float)
+
+    offset = BusinessDay(2)
+    indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
+    result = df.rolling(indexer, closed=closed, min_periods=1).sum()
+
+    tm.assert_equal(result, expected)
+
+
 def test_even_number_window_alignment():
     # see discussion in GH 38780
     s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3))
diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py
index 940f0845befa2..51f801ab3761b 100644
--- a/pandas/tests/window/test_rolling_functions.py
+++ b/pandas/tests/window/test_rolling_functions.py
@@ -388,7 +388,7 @@ def test_rolling_max_resample(step):
     # So that we can have 3 datapoints on last day (4, 10, and 20)
     indices.append(datetime(1975, 1, 5, 1))
     indices.append(datetime(1975, 1, 5, 2))
-    series = Series(list(range(0, 5)) + [10, 20], index=indices)
+    series = Series(list(range(5)) + [10, 20], index=indices)
     # Use floats instead of ints as values
     series = series.map(lambda x: float(x))
     # Sort chronologically
@@ -425,7 +425,7 @@ def test_rolling_min_resample(step):
     # So that we can have 3 datapoints on last day (4, 10, and 20)
     indices.append(datetime(1975, 1, 5, 1))
     indices.append(datetime(1975, 1, 5, 2))
-    series = Series(list(range(0, 5)) + [10, 20], index=indices)
+    series = Series(list(range(5)) + [10, 20], index=indices)
     # Use floats instead of ints as values
     series = series.map(lambda x: float(x))
     # Sort chronologically
@@ -445,7 +445,7 @@ def test_rolling_median_resample():
     # So that we can have 3 datapoints on last day (4, 10, and 20)
     indices.append(datetime(1975, 1, 5, 1))
     indices.append(datetime(1975, 1, 5, 2))
-    series = Series(list(range(0, 5)) + [10, 20], index=indices)
+    series = Series(list(range(5)) + [10, 20], index=indices)
     # Use floats instead of ints as values
     series = series.map(lambda x: float(x))
     # Sort chronologically
diff --git a/pyproject.toml b/pyproject.toml
index 845c2a63e84f0..9e579036c128b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,8 @@ requires = [
     # we don't want to force users to compile with 1.25 though
     # (Ideally, in the future, though, oldest-supported-numpy can be dropped when our min numpy is 1.25.x)
     "oldest-supported-numpy>=2022.8.16; python_version<'3.12'",
-    "numpy>=1.22.4; python_version>='3.12'",
+    # TODO: This needs to be updated when the official numpy 1.26 comes out
+    "numpy>=1.26.0b1; python_version>='3.12'",
     "versioneer[toml]"
 ]
 
@@ -30,7 +31,9 @@ license = {file = 'LICENSE'}
 requires-python = '>=3.9'
 dependencies = [
   "numpy>=1.22.4; python_version<'3.11'",
-  "numpy>=1.23.2; python_version>='3.11'",
+  "numpy>=1.23.2; python_version=='3.11'",
+  # TODO: This needs to be updated when the official numpy 1.26 comes out
+  "numpy>=1.26.0b1; python_version>='3.12'",
   "python-dateutil>=2.8.2",
   "pytz>=2020.1",
   "tzdata>=2022.1"
@@ -66,7 +69,7 @@ computation = ['scipy>=1.8.1', 'xarray>=2022.03.0']
 fss = ['fsspec>=2022.05.0']
 aws = ['s3fs>=2022.05.0']
 gcp = ['gcsfs>=2022.05.0', 'pandas-gbq>=0.17.5']
-excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.10', 'pyxlsb>=1.0.9', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.3']
+excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.10', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.9', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.3']
 parquet = ['pyarrow>=7.0.0']
 feather = ['pyarrow>=7.0.0']
 hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
@@ -109,6 +112,7 @@ all = ['beautifulsoup4>=4.11.1',
        'pytest>=7.3.2',
        'pytest-xdist>=2.2.0',
        'pytest-asyncio>=0.17.0',
+       'python-calamine>=0.1.6',
        'pyxlsb>=1.0.9',
        'qtpy>=2.2.0',
        'scipy>=1.8.1',
diff --git a/requirements-dev.txt b/requirements-dev.txt
index be02007a36333..01e0701bc39a7 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -36,6 +36,7 @@ pyarrow>=7.0.0
 pymysql>=1.0.2
 pyreadstat>=1.1.5
 tables>=3.7.0
+python-calamine>=0.1.6
 pyxlsb>=1.0.9
 s3fs>=2022.05.0
 scipy>=1.8.1
@@ -76,7 +77,6 @@ ipywidgets
 nbformat
 notebook>=6.0.3
 ipykernel
-jinja2
 markdown
 feedparser
 pyyaml
diff --git a/scripts/tests/data/deps_expected_random.yaml b/scripts/tests/data/deps_expected_random.yaml
index c70025f8f019d..1ede20f5cc0d8 100644
--- a/scripts/tests/data/deps_expected_random.yaml
+++ b/scripts/tests/data/deps_expected_random.yaml
@@ -44,6 +44,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.2
   - pytables>=3.6.1
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.8
   - s3fs>=2021.08.0
   - scipy>=1.7.1
diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml
index b43815a982139..501ec4f061f17 100644
--- a/scripts/tests/data/deps_minimum.toml
+++ b/scripts/tests/data/deps_minimum.toml
@@ -62,7 +62,7 @@ computation = ['scipy>=1.7.1', 'xarray>=0.21.0']
 fss = ['fsspec>=2021.07.0']
 aws = ['s3fs>=2021.08.0']
 gcp = ['gcsfs>=2021.07.0', 'pandas-gbq>=0.15.0']
-excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3']
+excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3']
 parquet = ['pyarrow>=7.0.0']
 feather = ['pyarrow>=7.0.0']
 hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
@@ -103,6 +103,7 @@ all = ['beautifulsoup4>=5.9.3',
        'pytest>=7.3.2',
        'pytest-xdist>=2.2.0',
        'pytest-asyncio>=0.17.0',
+       'python-calamine>=0.1.6',
        'pyxlsb>=1.0.8',
        'qtpy>=2.2.0',
        'scipy>=1.7.1',
diff --git a/scripts/tests/data/deps_unmodified_random.yaml b/scripts/tests/data/deps_unmodified_random.yaml
index 503eb3c7c7734..14bedd1025bf8 100644
--- a/scripts/tests/data/deps_unmodified_random.yaml
+++ b/scripts/tests/data/deps_unmodified_random.yaml
@@ -44,6 +44,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.2
   - pytables>=3.6.1
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.8
   - s3fs>=2021.08.0
   - scipy>=1.7.1
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
index 47534226f972f..0931dd209ee05 100755
--- a/scripts/validate_unwanted_patterns.py
+++ b/scripts/validate_unwanted_patterns.py
@@ -33,6 +33,7 @@
     "_agg_template_series",
     "_agg_template_frame",
     "_pipe_template",
+    "_apply_groupings_depr",
     "__main__",
     "_transform_template",
     "_use_inf_as_na",