Skip to content

Commit

Permalink
Merge branch 'master' into doc-multiindex-get_slice_bound
Browse files Browse the repository at this point in the history
  • Loading branch information
proost authored Dec 23, 2019
2 parents de298f5 + 3577b5a commit 4cce86d
Show file tree
Hide file tree
Showing 84 changed files with 1,060 additions and 967 deletions.
3 changes: 2 additions & 1 deletion ci/azure/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ jobs:
- bash: |
source activate pandas-dev
conda list
ci\\incremental\\build.cmd
python setup.py build_ext -q -i
python -m pip install --no-build-isolation -e .
displayName: 'Build'
- bash: |
source activate pandas-dev
Expand Down
8 changes: 6 additions & 2 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
black --version

MSG='Checking black formatting' ; echo $MSG
black . --check
black . --check
RET=$(($RET + $?)) ; echo $MSG "DONE"

# `setup.cfg` contains the list of error codes that are being ignored in flake8
Expand Down Expand Up @@ -104,7 +104,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
isort --version-number

# Imports - Check formatting using isort see setup.cfg for settings
MSG='Check import format using isort ' ; echo $MSG
MSG='Check import format using isort' ; echo $MSG
ISORT_CMD="isort --recursive --check-only pandas asv_bench"
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
Expand Down Expand Up @@ -203,6 +203,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of xrange instead of range' ; echo $MSG
invgrep -R --include=*.{py,pyx} 'xrange' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
INVGREP_APPEND=" <- trailing whitespaces found"
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
Expand Down
9 changes: 0 additions & 9 deletions ci/incremental/build.cmd

This file was deleted.

4 changes: 2 additions & 2 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ sh -c "$PYTEST_CMD"

if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
echo "uploading coverage"
echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
echo "bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME"
bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME
fi
Binary file removed doc/source/_static/favicon.ico
Binary file not shown.
8 changes: 6 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,11 @@
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
# html_theme_options = {}
html_theme_options = {
"external_links": [],
"github_url": "https://github.com/pandas-dev/pandas",
"twitter_url": "https://twitter.com/pandas_dev",
}

# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = ["themes"]
Expand All @@ -228,7 +232,7 @@
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
html_favicon = os.path.join(html_static_path[0], "favicon.ico")
html_favicon = "../../web/pandas/static/img/favicon.ico"

# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
Expand Down
5 changes: 2 additions & 3 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4763,10 +4763,10 @@ Parquet supports partitioning of data based on the values of one or more columns
.. ipython:: python
df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]})
df.to_parquet(fname='test', engine='pyarrow',
df.to_parquet(path='test', engine='pyarrow',
partition_cols=['a'], compression=None)
The `fname` specifies the parent directory to which data will be saved.
The `path` specifies the parent directory to which data will be saved.
The `partition_cols` are the column names by which the dataset will be partitioned.
Columns are partitioned in the order they are given. The partition splits are
determined by the unique values in the partition columns.
Expand Down Expand Up @@ -4828,7 +4828,6 @@ See also some :ref:`cookbook examples <cookbook.sql>` for some advanced strategi
The key functions are:

.. autosummary::
:toctree: ../reference/api/

read_sql_table
read_sql_query
Expand Down
9 changes: 8 additions & 1 deletion doc/source/user_guide/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ These are places where the behavior of ``StringDtype`` objects differ from
l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
that return **numeric** output will always return a nullable integer dtype,
rather than either int or float dtype, depending on the presence of NA values.
Methods returning **boolean** output will return a nullable boolean dtype.

.. ipython:: python
Expand All @@ -89,7 +90,13 @@ l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
s.astype(object).str.count("a")
s.astype(object).dropna().str.count("a")
When NA values are present, the output dtype is float64.
When NA values are present, the output dtype is float64. Similarly for
methods returning boolean values.

.. ipython:: python
s.str.isdigit()
s.str.match("a")
2. Some string methods, like :meth:`Series.str.decode` are not available
on ``StringArray`` because ``StringArray`` only holds strings, not
Expand Down
5 changes: 3 additions & 2 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,8 @@ Deprecations
- :func:`pandas.json_normalize` is now exposed in the top-level namespace.
Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and
it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
-
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)


.. _whatsnew_1000.prior_deprecations:

Expand Down Expand Up @@ -711,7 +712,7 @@ Datetimelike
- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`)
- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
-
- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`)

Timedelta
^^^^^^^^^
Expand Down
21 changes: 20 additions & 1 deletion pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1367,7 +1367,26 @@ def _ensure_encoded(list lst):
# common NA values
# no longer excluding inf representations
# '1.#INF','-1.#INF', '1.#INF000000',
_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES))
STR_NA_VALUES = {
"-1.#IND",
"1.#QNAN",
"1.#IND",
"-1.#QNAN",
"#N/A N/A",
"#N/A",
"N/A",
"n/a",
"NA",
"#NA",
"NULL",
"null",
"NaN",
"-NaN",
"nan",
"-nan",
"",
}
_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES))


def _maybe_upcast(arr):
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/testing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ cpdef assert_almost_equal(a, b,

raise_assert_detail(obj, f"{obj} length are different", na, nb, r)

for i in xrange(len(a)):
for i in range(len(a)):
try:
assert_almost_equal(a[i], b[i],
check_less_precise=check_less_precise)
Expand Down
25 changes: 18 additions & 7 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,22 @@ class Timestamp(_Timestamp):
"""
return cls(datetime.combine(date, time))

def __new__(cls, object ts_input=_no_input,
object freq=None, tz=None, unit=None,
year=None, month=None, day=None,
hour=None, minute=None, second=None, microsecond=None,
nanosecond=None, tzinfo=None):
def __new__(
cls,
object ts_input=_no_input,
object freq=None,
tz=None,
unit=None,
year=None,
month=None,
day=None,
hour=None,
minute=None,
second=None,
microsecond=None,
nanosecond=None,
tzinfo=None
):
# The parameter list folds together legacy parameter names (the first
# four) and positional and keyword parameter names from pydatetime.
#
Expand Down Expand Up @@ -401,8 +412,8 @@ class Timestamp(_Timestamp):
freq = None

if getattr(ts_input, 'tzinfo', None) is not None and tz is not None:
raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with the"
" tz parameter. Use tz_convert instead.")
raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with "
"the tz parameter. Use tz_convert instead.")

ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0)

Expand Down
7 changes: 4 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,16 +794,17 @@ def _add_offset(self, offset):
values = self.tz_localize(None)
else:
values = self
result = offset.apply_index(values)
if self.tz is not None:
result = result.tz_localize(self.tz)
result = offset.apply_index(values).tz_localize(self.tz)

except NotImplementedError:
warnings.warn(
"Non-vectorized DateOffset being applied to Series or DatetimeIndex",
PerformanceWarning,
)
result = self.astype("O") + offset
if len(self) == 0:
# _from_sequence won't be able to infer self.tz
return type(self)._from_sequence(result).tz_localize(self.tz)

return type(self)._from_sequence(result, freq="infer")

Expand Down
49 changes: 30 additions & 19 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@

from pandas._libs import algos as libalgos, lib
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
from pandas.util._decorators import (
Appender,
Substitution,
deprecate_kwarg,
rewrite_axis_style_signature,
)
from pandas.util._validators import (
validate_axis_style_args,
validate_bool_kwarg,
Expand Down Expand Up @@ -1740,7 +1745,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None):
rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
dtype=[('I', 'S2'), ('A', '<i8'), ('B', '<f8')])
>>> index_dtypes = "<S{}".format(df.index.str.len().max())
>>> index_dtypes = f"<S{df.index.str.len().max()}"
>>> df.to_records(index_dtypes=index_dtypes)
rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
dtype=[('I', 'S1'), ('A', '<i8'), ('B', '<f8')])
Expand Down Expand Up @@ -1829,9 +1834,10 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
return cls(mgr)

@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
def to_stata(
self,
fname,
path,
convert_dates=None,
write_index=True,
byteorder=None,
Expand All @@ -1849,11 +1855,16 @@ def to_stata(
Parameters
----------
fname : str, buffer or path object
path : str, buffer or path object
String, path object (pathlib.Path or py._path.local.LocalPath) or
object implementing a binary write() function. If using a buffer
then the buffer will not be automatically closed after the file
data has been written.
.. versionchanged:: 1.0.0
Previously this was "fname"
convert_dates : dict
Dictionary mapping columns containing datetime types to stata
internal format to use when writing the dates. Options are 'tc',
Expand Down Expand Up @@ -1927,7 +1938,7 @@ def to_stata(
kwargs["convert_strl"] = convert_strl

writer = statawriter(
fname,
path,
self,
convert_dates=convert_dates,
byteorder=byteorder,
Expand All @@ -1939,22 +1950,24 @@ def to_stata(
)
writer.write_file()

def to_feather(self, fname):
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
def to_feather(self, path):
"""
Write out the binary feather-format for DataFrames.
Parameters
----------
fname : str
path : str
String file path.
"""
from pandas.io.feather_format import to_feather

to_feather(self, fname)
to_feather(self, path)

@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
def to_parquet(
self,
fname,
path,
engine="auto",
compression="snappy",
index=None,
Expand All @@ -1973,11 +1986,13 @@ def to_parquet(
Parameters
----------
fname : str
path : str
File path or Root Directory path. Will be used as Root Directory
path while writing a partitioned dataset.
.. versionchanged:: 0.24.0
.. versionchanged:: 1.0.0
Previously this was "fname"
engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
Parquet library to use. If 'auto', then the option
Expand Down Expand Up @@ -2034,7 +2049,7 @@ def to_parquet(

to_parquet(
self,
fname,
path,
engine,
compression=compression,
index=index,
Expand Down Expand Up @@ -2340,13 +2355,9 @@ def _sizeof_fmt(num, size_qualifier):
# returns size in human readable format
for x in ["bytes", "KB", "MB", "GB", "TB"]:
if num < 1024.0:
return "{num:3.1f}{size_q} {x}".format(
num=num, size_q=size_qualifier, x=x
)
return f"{num:3.1f}{size_qualifier} {x}"
num /= 1024.0
return "{num:3.1f}{size_q} {pb}".format(
num=num, size_q=size_qualifier, pb="PB"
)
return f"{num:3.1f}{size_qualifier} PB"

if verbose:
_verbose_repr()
Expand All @@ -2359,7 +2370,7 @@ def _sizeof_fmt(num, size_qualifier):
_verbose_repr()

counts = self._data.get_dtype_counts()
dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())]
dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
lines.append(f"dtypes: {', '.join(dtypes)}")

if memory_usage is None:
Expand Down
Loading

0 comments on commit 4cce86d

Please sign in to comment.