Merge remote-tracking branch 'upstream/master' into na-indexing-raises

pandas-dev · Jan 2, 2020 · 816a47c · 816a47c
2 parents 37ea95e + 0913ed0
commit 816a47c
Show file tree

Hide file tree

Showing 100 changed files with 2,390 additions and 1,500 deletions.
diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py
@@ -132,6 +132,30 @@ def peakmem_to_json_wide(self, orient, frame):
         df.to_json(self.fname, orient=orient)
 
 
+class ToJSONISO(BaseIO):
+    fname = "__test__.json"
+    params = [["split", "columns", "index", "values", "records"]]
+    param_names = ["orient"]
+
+    def setup(self, orient):
+        N = 10 ** 5
+        index = date_range("20000101", periods=N, freq="H")
+        timedeltas = timedelta_range(start=1, periods=N, freq="s")
+        datetimes = date_range(start=1, periods=N, freq="s")
+        self.df = DataFrame(
+            {
+                "td_1": timedeltas,
+                "td_2": timedeltas,
+                "ts_1": datetimes,
+                "ts_2": datetimes,
+            },
+            index=index,
+        )
+
+    def time_iso_format(self, orient):
+        self.df.to_json(orient=orient, date_format="iso")
+
+
 class ToJSONLines(BaseIO):
 
     fname = "__test__.json"

diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml
@@ -19,18 +19,24 @@ jobs:
           ENV_FILE: ci/deps/azure-36-minimum_versions.yaml
           CONDA_PY: "36"
           PATTERN: "not slow and not network"
+
         py36_locale_slow_old_np:
           ENV_FILE: ci/deps/azure-36-locale_slow.yaml
           CONDA_PY: "36"
           PATTERN: "slow"
-          LOCALE_OVERRIDE: "zh_CN.UTF-8"
+          # pandas does not use the language (zh_CN), but should support diferent encodings (utf8)
+          # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
+          LANG: "zh_CN.utf8"
+          LC_ALL: "zh_CN.utf8"
           EXTRA_APT: "language-pack-zh-hans"
 
         py36_locale:
           ENV_FILE: ci/deps/azure-36-locale.yaml
           CONDA_PY: "36"
           PATTERN: "not slow and not network"
-          LOCALE_OVERRIDE: "it_IT.UTF-8"
+          LANG: "it_IT.utf8"
+          LC_ALL: "it_IT.utf8"
+          EXTRA_APT: "language-pack-it"
 
         py36_32bit:
           ENV_FILE: ci/deps/azure-36-32bit.yaml
@@ -42,7 +48,9 @@ jobs:
           ENV_FILE: ci/deps/azure-37-locale.yaml
           CONDA_PY: "37"
           PATTERN: "not slow and not network"
-          LOCALE_OVERRIDE: "zh_CN.UTF-8"
+          LANG: "zh_CN.utf8"
+          LC_ALL: "zh_CN.utf8"
+          EXTRA_APT: "language-pack-zh-hans"
 
         py37_np_dev:
           ENV_FILE: ci/deps/azure-37-numpydev.yaml
@@ -54,10 +62,16 @@ jobs:
 
   steps:
     - script: |
-        if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi
-        echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
-        echo "Creating Environment"
-        ci/setup_env.sh
+        if [ "$(uname)" == "Linux" ]; then
+          sudo apt-get update
+          sudo apt-get install -y libc6-dev-i386 $EXTRA_APT
+        fi
+      displayName: 'Install extra packages'
+
+    - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
+      displayName: 'Set conda path'
+
+    - script: ci/setup_env.sh
       displayName: 'Setup environment and build pandas'
 
     - script: |

diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml
@@ -34,7 +34,7 @@ jobs:
     - bash: |
         source activate pandas-dev
         conda list
-        python setup.py build_ext -q -i
+        python setup.py build_ext -q -i -j 4
         python -m pip install --no-build-isolation -e .
       displayName: 'Build'
 

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -100,6 +100,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of not concatenated strings' ; echo $MSG
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        $BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" .
+    else
+        $BASE_DIR/scripts/validate_string_concatenation.py .
+    fi
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     echo "isort --version-number"
     isort --version-number
 

diff --git a/ci/run_tests.sh b/ci/run_tests.sh
@@ -5,17 +5,6 @@
 # https://github.com/pytest-dev/pytest/issues/1075
 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
 
-if [ -n "$LOCALE_OVERRIDE" ]; then
-    export LC_ALL="$LOCALE_OVERRIDE"
-    export LANG="$LOCALE_OVERRIDE"
-    PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
-    if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
-        echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
-        # TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
-        # exit 1
-    fi
-fi
-
 if [[ "not network" == *"$PATTERN"* ]]; then
     export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
 fi

diff --git a/ci/setup_env.sh b/ci/setup_env.sh
@@ -1,15 +1,15 @@
 #!/bin/bash -e
 
 # edit the locale file if needed
-if [ -n "$LOCALE_OVERRIDE" ]; then
+if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
     echo "Adding locale to the first line of pandas/__init__.py"
     rm -f pandas/__init__.pyc
-    SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n"
+    SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n"
     sed -i "$SEDC" pandas/__init__.py
+
     echo "[head -4 pandas/__init__.py]"
     head -4 pandas/__init__.py
     echo
-    sudo locale-gen "$LOCALE_OVERRIDE"
 fi
 
 MINICONDA_DIR="$HOME/miniconda3"

diff --git a/doc/source/getting_started/10min.rst b/doc/source/getting_started/10min.rst
@@ -697,8 +697,9 @@ Plotting
 
 See the :ref:`Plotting <visualization>` docs.
 
+We use the standard convention for referencing the matplotlib API:
+
 .. ipython:: python
-   :suppress:
 
    import matplotlib.pyplot as plt
    plt.close('all')

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -56,7 +56,7 @@ Dedicated string data type
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 We've added :class:`StringDtype`, an extension type dedicated to string data.
-Previously, strings were typically stored in object-dtype NumPy arrays.
+Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`)
 
 .. warning::
 
@@ -216,13 +216,18 @@ Other enhancements
   (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
   now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
 - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
+- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
 - The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
 - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
 - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
 - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
 - :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
-
 - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
+- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``.  This format supports exporting strings containing Unicode characters (:issue:`23573`)
+- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
+- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`)
+
+
 
 Build Changes
 ^^^^^^^^^^^^^
@@ -781,6 +786,7 @@ Datetimelike
 - Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
 - Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
 - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
+- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`)
 - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
 - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`)
 - Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
@@ -894,6 +900,7 @@ I/O
 - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
 - Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
 - :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
+- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
 
 Plotting
 ^^^^^^^^
@@ -909,12 +916,13 @@ Plotting
 - :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
 - :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`).
 - Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
+- Allow :meth: `DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
 - Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`).
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
+- Bug in :meth:`DataFrame.groupby.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`)
 - Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
 - Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
 - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`)
@@ -944,6 +952,7 @@ Reshaping
 - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
 - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
 - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
+- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`)
 - Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
 - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
 - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
@@ -981,7 +990,10 @@ Other
 - Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`)
 - Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
 - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)
-- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:29069`)
+- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
+- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
+- Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
+- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
 
 .. _whatsnew_1000.contributors:
 

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -39,8 +39,6 @@
         "the C extensions first."
     )
 
-from datetime import datetime
-
 from pandas._config import (
     get_option,
     set_option,
@@ -210,6 +208,19 @@ class Panel:
 
             return Panel
 
+        elif name == "datetime":
+            warnings.warn(
+                "The pandas.datetime class is deprecated "
+                "and will be removed from pandas in a future version. "
+                "Import from datetime module instead.",
+                FutureWarning,
+                stacklevel=2,
+            )
+
+            from datetime import datetime as dt
+
+            return dt
+
         elif name == "np":
 
             warnings.warn(
@@ -264,13 +275,39 @@ def __getattr__(self, item):
                 FutureWarning,
                 stacklevel=2,
             )
+
             try:
                 return getattr(self.np, item)
             except AttributeError:
                 raise AttributeError(f"module numpy has no attribute {item}")
 
     np = __numpy()
 
+    class __Datetime:
+        def __init__(self):
+            from datetime import datetime as dt
+
+            self.datetime = dt
+
+        def __getattr__(self, item):
+            import warnings
+
+            warnings.warn(
+                "The pandas.datetime class is deprecated "
+                "and will be removed from pandas in a future version. "
+                "Import from datetime instead.",
+                FutureWarning,
+                stacklevel=2,
+            )
+
+            try:
+                return getattr(self.datetime, item)
+            except AttributeError:
+                raise AttributeError(f"module datetime has no attribute {item}")
+
+    datetime = __Datetime().datetime
+
+
 # module level doc-string
 __doc__ = """
 pandas - a powerful data analysis and manipulation library for Python