Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into na-indexing-raises
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Jan 2, 2020
2 parents 37ea95e + 0913ed0 commit 816a47c
Show file tree
Hide file tree
Showing 100 changed files with 2,390 additions and 1,500 deletions.
24 changes: 24 additions & 0 deletions asv_bench/benchmarks/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,30 @@ def peakmem_to_json_wide(self, orient, frame):
df.to_json(self.fname, orient=orient)


class ToJSONISO(BaseIO):
fname = "__test__.json"
params = [["split", "columns", "index", "values", "records"]]
param_names = ["orient"]

def setup(self, orient):
N = 10 ** 5
index = date_range("20000101", periods=N, freq="H")
timedeltas = timedelta_range(start=1, periods=N, freq="s")
datetimes = date_range(start=1, periods=N, freq="s")
self.df = DataFrame(
{
"td_1": timedeltas,
"td_2": timedeltas,
"ts_1": datetimes,
"ts_2": datetimes,
},
index=index,
)

def time_iso_format(self, orient):
self.df.to_json(orient=orient, date_format="iso")


class ToJSONLines(BaseIO):

fname = "__test__.json"
Expand Down
28 changes: 21 additions & 7 deletions ci/azure/posix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,24 @@ jobs:
ENV_FILE: ci/deps/azure-36-minimum_versions.yaml
CONDA_PY: "36"
PATTERN: "not slow and not network"

py36_locale_slow_old_np:
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
CONDA_PY: "36"
PATTERN: "slow"
LOCALE_OVERRIDE: "zh_CN.UTF-8"
# pandas does not use the language (zh_CN), but should support diferent encodings (utf8)
# we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
LANG: "zh_CN.utf8"
LC_ALL: "zh_CN.utf8"
EXTRA_APT: "language-pack-zh-hans"

py36_locale:
ENV_FILE: ci/deps/azure-36-locale.yaml
CONDA_PY: "36"
PATTERN: "not slow and not network"
LOCALE_OVERRIDE: "it_IT.UTF-8"
LANG: "it_IT.utf8"
LC_ALL: "it_IT.utf8"
EXTRA_APT: "language-pack-it"

py36_32bit:
ENV_FILE: ci/deps/azure-36-32bit.yaml
Expand All @@ -42,7 +48,9 @@ jobs:
ENV_FILE: ci/deps/azure-37-locale.yaml
CONDA_PY: "37"
PATTERN: "not slow and not network"
LOCALE_OVERRIDE: "zh_CN.UTF-8"
LANG: "zh_CN.utf8"
LC_ALL: "zh_CN.utf8"
EXTRA_APT: "language-pack-zh-hans"

py37_np_dev:
ENV_FILE: ci/deps/azure-37-numpydev.yaml
Expand All @@ -54,10 +62,16 @@ jobs:

steps:
- script: |
if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi
echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
echo "Creating Environment"
ci/setup_env.sh
if [ "$(uname)" == "Linux" ]; then
sudo apt-get update
sudo apt-get install -y libc6-dev-i386 $EXTRA_APT
fi
displayName: 'Install extra packages'
- script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
displayName: 'Set conda path'

- script: ci/setup_env.sh
displayName: 'Setup environment and build pandas'

- script: |
Expand Down
2 changes: 1 addition & 1 deletion ci/azure/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- bash: |
source activate pandas-dev
conda list
python setup.py build_ext -q -i
python setup.py build_ext -q -i -j 4
python -m pip install --no-build-isolation -e .
displayName: 'Build'
Expand Down
8 changes: 8 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of not concatenated strings' ; echo $MSG
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
$BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" .
else
$BASE_DIR/scripts/validate_string_concatenation.py .
fi
RET=$(($RET + $?)) ; echo $MSG "DONE"

echo "isort --version-number"
isort --version-number

Expand Down
11 changes: 0 additions & 11 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,6 @@
# https://github.com/pytest-dev/pytest/issues/1075
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')

if [ -n "$LOCALE_OVERRIDE" ]; then
export LC_ALL="$LOCALE_OVERRIDE"
export LANG="$LOCALE_OVERRIDE"
PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
# TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
# exit 1
fi
fi

if [[ "not network" == *"$PATTERN"* ]]; then
export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
fi
Expand Down
6 changes: 3 additions & 3 deletions ci/setup_env.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#!/bin/bash -e

# edit the locale file if needed
if [ -n "$LOCALE_OVERRIDE" ]; then
if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
echo "Adding locale to the first line of pandas/__init__.py"
rm -f pandas/__init__.pyc
SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n"
SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n"
sed -i "$SEDC" pandas/__init__.py

echo "[head -4 pandas/__init__.py]"
head -4 pandas/__init__.py
echo
sudo locale-gen "$LOCALE_OVERRIDE"
fi

MINICONDA_DIR="$HOME/miniconda3"
Expand Down
3 changes: 2 additions & 1 deletion doc/source/getting_started/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,9 @@ Plotting

See the :ref:`Plotting <visualization>` docs.

We use the standard convention for referencing the matplotlib API:

.. ipython:: python
:suppress:
import matplotlib.pyplot as plt
plt.close('all')
Expand Down
20 changes: 16 additions & 4 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Dedicated string data type
^^^^^^^^^^^^^^^^^^^^^^^^^^

We've added :class:`StringDtype`, an extension type dedicated to string data.
Previously, strings were typically stored in object-dtype NumPy arrays.
Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`)

.. warning::

Expand Down Expand Up @@ -216,13 +216,18 @@ Other enhancements
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)

- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`)
- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`)



Build Changes
^^^^^^^^^^^^^
Expand Down Expand Up @@ -781,6 +786,7 @@ Datetimelike
- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`)
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`)
- Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
Expand Down Expand Up @@ -894,6 +900,7 @@ I/O
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)

Plotting
^^^^^^^^
Expand All @@ -909,12 +916,13 @@ Plotting
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`).
- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
- Allow :meth: `DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`).

Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
- Bug in :meth:`DataFrame.groupby.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`)
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`)
Expand Down Expand Up @@ -944,6 +952,7 @@ Reshaping
- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`)
- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
Expand Down Expand Up @@ -981,7 +990,10 @@ Other
- Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`)
- Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:29069`)
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
- Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)

.. _whatsnew_1000.contributors:

Expand Down
41 changes: 39 additions & 2 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@
"the C extensions first."
)

from datetime import datetime

from pandas._config import (
get_option,
set_option,
Expand Down Expand Up @@ -210,6 +208,19 @@ class Panel:

return Panel

elif name == "datetime":
warnings.warn(
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. "
"Import from datetime module instead.",
FutureWarning,
stacklevel=2,
)

from datetime import datetime as dt

return dt

elif name == "np":

warnings.warn(
Expand Down Expand Up @@ -264,13 +275,39 @@ def __getattr__(self, item):
FutureWarning,
stacklevel=2,
)

try:
return getattr(self.np, item)
except AttributeError:
raise AttributeError(f"module numpy has no attribute {item}")

np = __numpy()

class __Datetime:
def __init__(self):
from datetime import datetime as dt

self.datetime = dt

def __getattr__(self, item):
import warnings

warnings.warn(
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. "
"Import from datetime instead.",
FutureWarning,
stacklevel=2,
)

try:
return getattr(self.datetime, item)
except AttributeError:
raise AttributeError(f"module datetime has no attribute {item}")

datetime = __Datetime().datetime


# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python
Expand Down
Loading

0 comments on commit 816a47c

Please sign in to comment.