Merge branch 'main' into just-change-value-counts

pandas-dev · Dec 27, 2022 · ec48816 · ec48816
2 parents 2d40eac + eff6566
commit ec48816
Show file tree

Hide file tree

Showing 161 changed files with 1,772 additions and 1,685 deletions.
diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml
@@ -18,7 +18,7 @@ runs:
     - name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }}
       run: |
         grep -q '  - pyarrow' ${{ inputs.environment-file }}
-        sed -i"" -e "s/  - pyarrow<10/  - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
+        sed -i"" -e "s/  - pyarrow/  - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
         cat ${{ inputs.environment-file }}
       shell: bash
       if: ${{ inputs.pyarrow-version }}

diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml
@@ -16,6 +16,7 @@ env:
   PANDAS_CI: 1
   PYTEST_TARGET: pandas
   PATTERN: "not slow and not db and not network and not single_cpu"
+  TEST_ARGS: "-W error:::pandas"
 
 
 permissions:

diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
@@ -29,7 +29,7 @@ jobs:
       matrix:
         env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
         pattern: ["not single_cpu", "single_cpu"]
-        pyarrow_version: ["7", "8", "9"]
+        pyarrow_version: ["7", "8", "9", "10"]
         include:
           - name: "Downstream Compat"
             env_file: actions-38-downstream_compat.yaml
@@ -38,6 +38,7 @@ jobs:
           - name: "Minimum Versions"
             env_file: actions-38-minimum_versions.yaml
             pattern: "not slow and not network and not single_cpu"
+            test_args: ""
           - name: "Locale: it_IT"
             env_file: actions-38.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -62,10 +63,12 @@ jobs:
             env_file: actions-310.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_copy_on_write: "1"
+            test_args: ""
           - name: "Data Manager"
             env_file: actions-38.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_data_manager: "array"
+            test_args: ""
           - name: "Pypy"
             env_file: actions-pypy-38.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -93,7 +96,7 @@ jobs:
       LC_ALL: ${{ matrix.lc_all || '' }}
       PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
       PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
-      TEST_ARGS: ${{ matrix.test_args || '' }}
+      TEST_ARGS: ${{ matrix.test_args || '-W error:::pandas' }}
       PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
       IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -333,3 +333,13 @@ repos:
         additional_dependencies:
         - autotyping==22.9.0
         - libcst==0.4.7
+    -   id: check-test-naming
+        name: check that test names start with 'test'
+        entry: python -m scripts.check_test_naming
+        types: [python]
+        files: ^pandas/tests
+        language: python
+        exclude: |
+            (?x)
+            ^pandas/tests/generic/test_generic.py  # GH50380
+            |^pandas/tests/io/json/test_readlines.py  # GH50378
diff --git a/Dockerfile b/Dockerfile
@@ -8,6 +8,6 @@ RUN apt-get install -y build-essential
 RUN apt-get install -y libhdf5-dev
 
 RUN python -m pip install --upgrade pip
-RUN python -m pip install --use-deprecated=legacy-resolver \
+RUN python -m pip install \
     -r https://raw.githubusercontent.com/pandas-dev/pandas/main/requirements-dev.txt
 CMD ["/bin/bash"]
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -41,7 +41,7 @@
     // pip (with all the conda available packages installed first,
     // followed by the pip installed packages).
     "matrix": {
-        "numpy": [],
+        "numpy": ["1.23.5"],  // https://github.com/pandas-dev/pandas/pull/50356
         "Cython": ["0.29.32"],
         "matplotlib": [],
         "sqlalchemy": [],

diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-310-numpydev.yaml
@@ -22,5 +22,5 @@ dependencies:
     - "cython"
     - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
     - "--pre"
-    - "numpy"
+    - "numpy<1.24"
     - "scipy"
diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
@@ -18,7 +18,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<1.24
   - pytz
 
   # optional dependencies
@@ -42,7 +42,7 @@ dependencies:
   - psycopg2
   - pymysql
   - pytables
-  - pyarrow<10
+  - pyarrow
   - pyreadstat
   - python-snappy
   - pyxlsb

diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml
@@ -19,7 +19,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<1.24
   - pytz
 
   # optional dependencies
@@ -40,7 +40,7 @@ dependencies:
   - openpyxl
   - odfpy
   - psycopg2
-  - pyarrow<10
+  - pyarrow
   - pymysql
   - pyreadstat
   - pytables

diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml
@@ -18,7 +18,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<1.24
   - pytz
 
   # optional dependencies
@@ -40,7 +40,7 @@ dependencies:
   - odfpy
   - pandas-gbq
   - psycopg2
-  - pyarrow<10
+  - pyarrow
   - pymysql
   - pyreadstat
   - pytables

diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
@@ -18,7 +18,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<1.24
   - pytz
 
   # optional dependencies
@@ -41,7 +41,7 @@ dependencies:
   - pandas-gbq
   - psycopg2
   - pymysql
-  - pyarrow<10
+  - pyarrow
   - pyreadstat
   - pytables
   - python-snappy

diff --git a/ci/deps/actions-pypy-38.yaml b/ci/deps/actions-pypy-38.yaml
@@ -19,6 +19,6 @@ dependencies:
   - hypothesis>=5.5.3
 
   # required
-  - numpy
+  - numpy<1.24
   - python-dateutil
   - pytz
diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml
@@ -18,7 +18,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<1.24
   - pytz
 
   # optional dependencies
@@ -40,7 +40,7 @@ dependencies:
   - odfpy
   - pandas-gbq
   - psycopg2
-  - pyarrow<10
+  - pyarrow
   - pymysql
   # Not provided on ARM
   #- pyreadstat

diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst
@@ -298,6 +298,7 @@ MultiIndex components
    MultiIndex.swaplevel
    MultiIndex.reorder_levels
    MultiIndex.remove_unused_levels
+   MultiIndex.drop
 
 MultiIndex selecting
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -275,6 +275,9 @@ parse_dates : boolean or list of ints or names or list of lists or dict, default
 infer_datetime_format : boolean, default ``False``
   If ``True`` and parse_dates is enabled for a column, attempt to infer the
   datetime format to speed up the processing.
+
+  .. deprecated:: 2.0.0
+   A strict version of this argument is now the default, passing it has no effect.
 keep_date_col : boolean, default ``False``
   If ``True`` and parse_dates specifies combining multiple columns then keep the
   original columns.
@@ -916,12 +919,10 @@ an exception is raised, the next one is tried:
 
 Note that performance-wise, you should try these methods of parsing dates in order:
 
-1. Try to infer the format using ``infer_datetime_format=True`` (see section below).
-
-2. If you know the format, use ``pd.to_datetime()``:
+1. If you know the format, use ``pd.to_datetime()``:
    ``date_parser=lambda x: pd.to_datetime(x, format=...)``.
 
-3. If you have a really non-standard format, use a custom ``date_parser`` function.
+2. If you have a really non-standard format, use a custom ``date_parser`` function.
    For optimal performance, this should be vectorized, i.e., it should accept arrays
    as arguments.