Skip to content

Commit

Permalink
Merge branch 'main' into today_now_error
Browse files Browse the repository at this point in the history
  • Loading branch information
dannyi96 authored Jul 31, 2022
2 parents 5582fd8 + 6e1a040 commit 485615c
Show file tree
Hide file tree
Showing 113 changed files with 1,172 additions and 443 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/32-bit-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
/opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \
. ~/virtualenvs/pandas-dev/bin/activate && \
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
pip install cython==0.29.30 numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
python setup.py build_ext -q -j2 && \
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
export PANDAS_CI=1 && \
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: CodeQL
on:
schedule:
# every day at midnight
- cron: "0 0 * * *"

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:
analyze:
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write

strategy:
fail-fast: false
matrix:
language:
- python

steps:
- uses: actions/checkout@v3
- uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
- uses: github/codeql-action/autobuild@v2
- uses: github/codeql-action/analyze@v2
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ repos:
language: python
additional_dependencies:
- flake8==4.0.1
- flake8-pyi==22.5.1
- flake8-pyi==22.7.0
- id: future-annotations
name: import annotations from __future__
entry: 'from __future__ import annotations'
Expand Down
4 changes: 2 additions & 2 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ fi
### DOCSTRINGS ###
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then

MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05)' ; echo $MSG
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
RET=$(($RET + $?)) ; echo $MSG "DONE"

fi
Expand Down
26 changes: 15 additions & 11 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,25 @@
# sphinxext.

extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.doctest",
"sphinx.ext.extlinks",
"sphinx.ext.todo",
"numpydoc", # handle NumPy documentation formatted docstrings
"contributors", # custom pandas extension
"IPython.sphinxext.ipython_directive",
"IPython.sphinxext.ipython_console_highlighting",
"matplotlib.sphinxext.plot_directive",
"sphinx.ext.intersphinx",
"numpydoc",
"sphinx_copybutton",
"sphinx_panels",
"sphinx_toggleprompt",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.doctest",
"sphinx.ext.extlinks",
"sphinx.ext.ifconfig",
"sphinx.ext.intersphinx",
"sphinx.ext.linkcode",
"sphinx.ext.mathjax",
"sphinx.ext.todo",
"nbsphinx",
"sphinx_panels",
"contributors", # custom pandas extension
]

exclude_patterns = [
Expand Down Expand Up @@ -144,6 +146,9 @@
# already loads it
panels_add_bootstrap_css = False

# https://sphinx-toggleprompt.readthedocs.io/en/stable/#offset
toggleprompt_offset_right = 35

# Add any paths that contain templates here, relative to this directory.
templates_path = ["../_templates"]

Expand Down Expand Up @@ -453,7 +458,6 @@
# extlinks alias
extlinks = {
"issue": ("https://github.com/pandas-dev/pandas/issues/%s", "GH"),
"wiki": ("https://github.com/pandas-dev/pandas/wiki/%s", "wiki "),
}


Expand Down
10 changes: 5 additions & 5 deletions doc/source/ecosystem.rst
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ A good implementation for Python users is `has2k1/plotnine <https://github.com/h
`IPython Vega <https://github.com/vega/ipyvega>`__ leverages `Vega
<https://github.com/vega/vega>`__ to create plots within Jupyter Notebook.

`Plotly <https://poltly.com/python>`__
`Plotly <https://plotly.com/python>`__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

`Plotly’s <https://poltly.com/>`__ `Python API <https://poltly.com/python/>`__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js <https://d3js.org/>`__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn <https://poltly.com/python/matplotlib-to-plotly-tutorial/>`__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks <https://plotly.com/ipython-notebooks/>`__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `offline <https://poltly.com/python/offline/>`__, or `on-premise <https://poltly.com/product/enterprise/>`__ accounts for private use.
`Plotly’s <https://plotly.com/>`__ `Python API <https://plotly.com/python/>`__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js <https://d3js.org/>`__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn <https://plotly.com/python/matplotlib-to-plotly-tutorial/>`__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks <https://plotly.com/ipython-notebooks/>`__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `offline <https://plotly.com/python/offline/>`__, or `on-premise <https://plotly.com/product/enterprise/>`__ accounts for private use.

`Lux <https://github.com/lux-org/lux>`__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -591,12 +591,12 @@ Library Accessor Classes Description
Development tools
-----------------

`pandas-stubs <https://github.com/VirtusLab/pandas-stubs>`__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
`pandas-stubs <https://github.com/pandas-dev/pandas-stubs>`__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

While pandas repository is partially typed, the package itself doesn't expose this information for external use.
Install pandas-stubs to enable basic type coverage of pandas API.

Learn more by reading through :issue:`14468`, :issue:`26766`, :issue:`28142`.

See installation and usage instructions on the `github page <https://github.com/VirtusLab/pandas-stubs>`__.
See installation and usage instructions on the `github page <https://github.com/pandas-dev/pandas-stubs>`__.
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ applied to integers, so no ``str`` is used.
Based on the index name of the row (``307``) and the column (``Name``),
we can do a selection using the ``loc`` operator, introduced in the
`tutorial on subsetting <3_subset_data.ipynb>`__.
:ref:`tutorial on subsetting <10min_tut_03_subset>`.

.. raw:: html

Expand Down
2 changes: 1 addition & 1 deletion doc/source/reference/general_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,4 @@ Importing from other DataFrame libraries
.. autosummary::
:toctree: api/

api.exchange.from_dataframe
api.interchange.from_dataframe
32 changes: 19 additions & 13 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ index_col : int, str, sequence of int / str, or False, optional, default ``None`
string name or column index. If a sequence of int / str is given, a
MultiIndex is used.

Note: ``index_col=False`` can be used to force pandas to *not* use the first
column as the index, e.g. when you have a malformed file with delimiters at
the end of each line.
.. note::
``index_col=False`` can be used to force pandas to *not* use the first
column as the index, e.g. when you have a malformed file with delimiters at
the end of each line.

The default value of ``None`` instructs pandas to guess. If the number of
fields in the column header row is equal to the number of fields in the body
Expand Down Expand Up @@ -182,15 +183,16 @@ General parsing configuration
+++++++++++++++++++++++++++++

dtype : Type name or dict of column -> type, default ``None``
Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}``
(unsupported with ``engine='python'``). Use ``str`` or ``object`` together
with suitable ``na_values`` settings to preserve and
not interpret dtype.
Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32, 'c': 'Int64'}``
Use ``str`` or ``object`` together with suitable ``na_values`` settings to preserve
and not interpret dtype. If converters are specified, they will be applied INSTEAD
of dtype conversion.

.. versionadded:: 1.5.0

Support for defaultdict was added. Specify a defaultdict as input where
the default determines the dtype of the columns which are not explicitly
listed.
Support for defaultdict was added. Specify a defaultdict as input where
the default determines the dtype of the columns which are not explicitly
listed.
engine : {``'c'``, ``'python'``, ``'pyarrow'``}
Parser engine to use. The C and pyarrow engines are faster, while the python engine
is currently more feature-complete. Multithreading is currently only supported by
Expand Down Expand Up @@ -283,7 +285,9 @@ parse_dates : boolean or list of ints or names or list of lists or dict, default
* If ``[[1, 3]]`` -> combine columns 1 and 3 and parse as a single date
column.
* If ``{'foo': [1, 3]}`` -> parse columns 1, 3 as date and call result 'foo'.
A fast-path exists for iso8601-formatted dates.

.. note::
A fast-path exists for iso8601-formatted dates.
infer_datetime_format : boolean, default ``False``
If ``True`` and parse_dates is enabled for a column, attempt to infer the
datetime format to speed up the processing.
Expand Down Expand Up @@ -1593,8 +1597,10 @@ of multi-columns indices.
pd.read_csv("mi2.csv", header=[0, 1], index_col=0)
Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will be *lost*.
.. note::
If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will
be *lost*.

.. ipython:: python
:suppress:
Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,7 @@ Other Deprecations
- Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
- Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
- Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`)
- Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`)

.. ---------------------------------------------------------------------------
.. _whatsnew_150.performance:
Expand Down Expand Up @@ -802,6 +803,7 @@ Performance improvements
- Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`)
- Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`)
- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47404`, :issue:`47405`)
- Performance improvement in ``argmax`` and ``argmin`` for :class:`arrays.SparseArray` (:issue:`34197`)
-

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -1023,6 +1025,7 @@ Reshaping
- Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`)
- Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`)
- Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`)
- Bug in :meth:`DataFrame.join` with categorical index results in unexpected reordering (:issue:`47812`)

Sparse
^^^^^^
Expand Down
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ dependencies:
- pytest-cython # doctest
- sphinx
- sphinx-panels
- sphinx-copybutton
- types-python-dateutil
- types-PyMySQL
- types-pytz
Expand All @@ -128,3 +129,4 @@ dependencies:
- jupyterlab >=3.4,<4
- pip:
- jupyterlite==0.1.0b10
- sphinx-toggleprompt
5 changes: 3 additions & 2 deletions pandas/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,9 @@ class RegisteredOption(NamedTuple):

class OptionError(AttributeError, KeyError):
"""
Exception for pandas.options, backwards compatible with KeyError
checks.
Exception raised for pandas.options.
Backwards compatible with KeyError checks.
"""


Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/algos.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import annotations

from typing import Any

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/groupby.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def group_any_all(
val_test: Literal["any", "all"],
skipna: bool,
) -> None: ...
def group_add(
def group_sum(
out: np.ndarray, # complexfloating_t[:, ::1]
counts: np.ndarray, # int64_t[::1]
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]
Expand Down
30 changes: 15 additions & 15 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def group_median_float64(
ndarray[intp_t] indexer
float64_t* ptr

assert min_count == -1, "'min_count' only used in add and prod"
assert min_count == -1, "'min_count' only used in sum and prod"

ngroups = len(counts)
N, K = (<object>values).shape
Expand Down Expand Up @@ -502,7 +502,7 @@ def group_any_all(


# ----------------------------------------------------------------------
# group_add, group_prod, group_var, group_mean, group_ohlc
# group_sum, group_prod, group_var, group_mean, group_ohlc
# ----------------------------------------------------------------------

ctypedef fused mean_t:
Expand All @@ -511,17 +511,17 @@ ctypedef fused mean_t:
complex64_t
complex128_t

ctypedef fused add_t:
ctypedef fused sum_t:
mean_t
object


@cython.wraparound(False)
@cython.boundscheck(False)
def group_add(
add_t[:, ::1] out,
def group_sum(
sum_t[:, ::1] out,
int64_t[::1] counts,
ndarray[add_t, ndim=2] values,
ndarray[sum_t, ndim=2] values,
const intp_t[::1] labels,
Py_ssize_t min_count=0,
bint is_datetimelike=False,
Expand All @@ -531,8 +531,8 @@ def group_add(
"""
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
add_t val, t, y
add_t[:, ::1] sumx, compensation
sum_t val, t, y
sum_t[:, ::1] sumx, compensation
int64_t[:, ::1] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

Expand All @@ -546,7 +546,7 @@ def group_add(

N, K = (<object>values).shape

if add_t is object:
if sum_t is object:
# NB: this does not use 'compensation' like the non-object track does.
for i in range(N):
lab = labels[i]
Expand Down Expand Up @@ -588,10 +588,10 @@ def group_add(

# not nan
# With dt64/td64 values, values have been cast to float64
# instead if int64 for group_add, but the logic
# instead if int64 for group_sum, but the logic
# is otherwise the same as in _treat_as_na
if val == val and not (
add_t is float64_t
sum_t is float64_t
and is_datetimelike
and val == <float64_t>NPY_NAT
):
Expand Down Expand Up @@ -677,7 +677,7 @@ def group_var(
int64_t[:, ::1] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"
assert min_count == -1, "'min_count' only used in sum and prod"

if len_values != len_labels:
raise ValueError("len(index) != len(labels)")
Expand Down Expand Up @@ -745,7 +745,7 @@ def group_mean(
Array containing unique label for each group, with its
ordering matching up to the corresponding record in `values`.
min_count : Py_ssize_t
Only used in add and prod. Always -1.
Only used in sum and prod. Always -1.
is_datetimelike : bool
True if `values` contains datetime-like entries.
mask : ndarray[bool, ndim=2], optional
Expand All @@ -766,7 +766,7 @@ def group_mean(
int64_t[:, ::1] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"
assert min_count == -1, "'min_count' only used in sum and prod"

if len_values != len_labels:
raise ValueError("len(index) != len(labels)")
Expand Down Expand Up @@ -821,7 +821,7 @@ def group_ohlc(
Py_ssize_t i, j, N, K, lab
floating val

assert min_count == -1, "'min_count' only used in add and prod"
assert min_count == -1, "'min_count' only used in sum and prod"

if len(labels) == 0:
return
Expand Down
Loading

0 comments on commit 485615c

Please sign in to comment.